ManaPlus
dyepalette_replacescolor.cpp
Go to the documentation of this file.
1 /*
2  * The ManaPlus Client
3  * Copyright (C) 2007-2009 The Mana World Development Team
4  * Copyright (C) 2009-2010 The Mana Developers
5  * Copyright (C) 2011-2019 The ManaPlus Developers
6  * Copyright (C) 2019-2021 Andrei Karas
7  *
8  * This file is part of The ManaPlus Client.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program. If not, see <http://www.gnu.org/licenses/>.
22  */
23 
25 
26 PRAGMA48(GCC diagnostic push)
27 PRAGMA48(GCC diagnostic ignored "-Wshadow")
28 #ifndef SDL_BIG_ENDIAN
29 #include <SDL_endian.h>
30 #endif // SDL_BYTEORDER
31 PRAGMA48(GCC diagnostic pop)
32 
33 #ifdef SIMD_SUPPORTED
34 // avx2
35 #include <immintrin.h>
36 #endif // SIMD_SUPPORTED
37 
38 #include "debug.h"
39 
41  const int bufSize) const restrict2
42 {
43  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
44  const size_t sz = mColors.size();
45  if (sz == 0U || pixels == nullptr)
46  return;
47  if ((sz % 2) != 0U)
48  -- it_end;
49 
50  for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
51  pixels != p_end;
52  ++ pixels)
53  {
54  uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
55 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
56  const unsigned int data = (*pixels) & 0x00ffffffU;
57 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
58 
59  const unsigned int data = (*pixels) & 0xffffff00U;
60 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
61 
62  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
63  while (it != it_end)
64  {
65  const DyeColor &col = *it;
66  ++ it;
67  const DyeColor &col2 = *it;
68 
69 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
70  const unsigned int coldata = (col.value[2] << 16U)
71  | (col.value[1] << 8U) | (col.value[0]);
72 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
73 
74  const unsigned int coldata = (col.value[2] << 8U)
75  | (col.value[1] << 16U) | (col.value[0] << 24U);
76 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
77 
78  if (data == coldata)
79  {
80  p[3] = col2.value[0];
81  p[2] = col2.value[1];
82  p[1] = col2.value[2];
83  break;
84  }
85 
86  ++ it;
87  }
88  }
89 }
90 
91 #ifdef SIMD_SUPPORTED
92 /*
93 static void print256(const char *const text, const __m256i &val);
94 static void print256(const char *const text, const __m256i &val)
95 {
96  printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
97 }
98 */
99 
100 __attribute__ ((target ("sse2")))
101 void DyePalette::replaceSColorSse2(uint32_t *restrict pixels,
102  const int bufSize) const restrict2
103 {
104  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
105  const size_t sz = mColors.size();
106  if (sz == 0U || pixels == nullptr)
107  return;
108  if ((sz % 2) != 0U)
109  -- it_end;
110  const int mod = bufSize % 8;
111  const int bufEnd = bufSize - mod;
112 
113  for (int ptr = 0; ptr < bufEnd; ptr += 4)
114  {
115  __m128i mask = _mm_set1_epi32(0xffffff00U);
116 // __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
117  __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
118  &pixels[ptr]));
119 
120  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
121  while (it != it_end)
122  {
123  const DyeColor &col = *it;
124  ++ it;
125  const DyeColor &col2 = *it;
126 
127  __m128i base2 = _mm_and_si128(mask, base);
128  __m128i newMask = _mm_set1_epi32(col2.valueS);
129  __m128i cmpMask = _mm_set1_epi32(col.valueS);
130  __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
131  cmpRes = _mm_and_si128(mask, cmpRes);
132  __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
133  __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
134  base = _mm_or_si128(srcAnd, dstAnd);
135  ++ it;
136  }
137 // _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
138  _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
139  }
140 
141  // complete end without simd
142  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
143  {
144  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
145 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
146  const unsigned int data = pixels[ptr] & 0x00ffffffU;
147 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
148 
149  const unsigned int data = pixels[ptr] & 0xffffff00U;
150 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
151 
152  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
153  while (it != it_end)
154  {
155  const DyeColor &col = *it;
156  ++ it;
157  const DyeColor &col2 = *it;
158 
159 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
160  const unsigned int coldata = (col.value[2] << 16U)
161  | (col.value[1] << 8U) | (col.value[0]);
162 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
163 
164  const unsigned int coldata = (col.value[2] << 8U)
165  | (col.value[1] << 16U) | (col.value[0] << 24U);
166 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
167 
168  if (data == coldata)
169  {
170  p[3] = col2.value[0];
171  p[2] = col2.value[1];
172  p[1] = col2.value[2];
173  break;
174  }
175 
176  ++ it;
177  }
178  }
179 }
180 
181 __attribute__ ((target ("avx2")))
182 void DyePalette::replaceSColorAvx2(uint32_t *restrict pixels,
183  const int bufSize) const restrict2
184 {
185  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
186  const size_t sz = mColors.size();
187  if (sz == 0U || pixels == nullptr)
188  return;
189  if ((sz % 2) != 0U)
190  -- it_end;
191  const int mod = bufSize % 8;
192  const int bufEnd = bufSize - mod;
193 
194  for (int ptr = 0; ptr < bufEnd; ptr += 8)
195  {
196  __m256i mask = _mm256_set1_epi32(0xffffff00U);
197 // __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
198  __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
199  &pixels[ptr]));
200 
201  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
202  while (it != it_end)
203  {
204  const DyeColor &col = *it;
205  ++ it;
206  const DyeColor &col2 = *it;
207 
208  __m256i base2 = _mm256_and_si256(mask, base);
209  __m256i newMask = _mm256_set1_epi32(col2.valueS);
210  __m256i cmpMask = _mm256_set1_epi32(col.valueS);
211  __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
212  cmpRes = _mm256_and_si256(mask, cmpRes);
213  __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
214  __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
215  base = _mm256_or_si256(srcAnd, dstAnd);
216  ++ it;
217  }
218 // _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
219  _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
220  }
221 
222  // complete end without simd
223  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
224  {
225  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
226 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
227  const unsigned int data = pixels[ptr] & 0x00ffffffU;
228 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
229 
230  const unsigned int data = pixels[ptr] & 0xffffff00U;
231 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
232 
233  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
234  while (it != it_end)
235  {
236  const DyeColor &col = *it;
237  ++ it;
238  const DyeColor &col2 = *it;
239 
240 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
241  const unsigned int coldata = (col.value[2] << 16U)
242  | (col.value[1] << 8U) | (col.value[0]);
243 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
244 
245  const unsigned int coldata = (col.value[2] << 8U)
246  | (col.value[1] << 16U) | (col.value[0] << 24U);
247 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
248 
249  if (data == coldata)
250  {
251  p[3] = col2.value[0];
252  p[2] = col2.value[1];
253  p[1] = col2.value[2];
254  break;
255  }
256 
257  ++ it;
258  }
259  }
260 }
261 
262 #endif // SIMD_SUPPORTED
#define CAST_SIZE
Definition: cast.h:34
void replaceSColorDefault(uint32_t *pixels, const int bufSize) const
#define restrict
Definition: localconsts.h:165
#define restrict2
Definition: localconsts.h:166
#define PRAGMA48(str)
Definition: localconsts.h:199
uint32_t data
union EAthena::ItemFlags __attribute__((packed))
std::map< std::string, DyeColor > mColors
Definition: palettedb.cpp:37
uint8_t value[4]
Definition: dyecolor.h:77
uint32_t valueS
Definition: dyecolor.h:81