ManaPlus
dyepalette_replacesoglcolor.cpp
Go to the documentation of this file.
1 /*
2  * The ManaPlus Client
3  * Copyright (C) 2007-2009 The Mana World Development Team
4  * Copyright (C) 2009-2010 The Mana Developers
5  * Copyright (C) 2011-2019 The ManaPlus Developers
6  * Copyright (C) 2019-2021 Andrei Karas
7  *
8  * This file is part of The ManaPlus Client.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program. If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 #ifdef USE_OPENGL
25 
27 
28 PRAGMA48(GCC diagnostic push)
29 PRAGMA48(GCC diagnostic ignored "-Wshadow")
30 #ifndef SDL_BIG_ENDIAN
31 #include <SDL_endian.h>
32 #endif // SDL_BYTEORDER
33 PRAGMA48(GCC diagnostic pop)
34 
35 #ifdef SIMD_SUPPORTED
36 // avx2
37 #include <immintrin.h>
38 #endif // SIMD_SUPPORTED
39 
40 #include "debug.h"
41 
43  const int bufSize) const restrict2
44 {
45  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
46  const size_t sz = mColors.size();
47  if ((sz == 0U) || (pixels == nullptr))
48  return;
49  if ((sz % 2) != 0U)
50  -- it_end;
51 
52  for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
53  pixels != p_end;
54  ++pixels)
55  {
56  uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
57 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
58  const unsigned int data = (*pixels) & 0xffffff00;
59 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
60 
61  const unsigned int data = (*pixels) & 0x00ffffff;
62 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
63 
64  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
65  while (it != it_end)
66  {
67  const DyeColor &col = *it;
68  ++ it;
69  const DyeColor &col2 = *it;
70 
71 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
72  const unsigned int coldata = (col.value[0] << 24)
73  | (col.value[1] << 16) | (col.value[2] << 8);
74 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
75 
76  const unsigned int coldata = (col.value[0])
77  | (col.value[1] << 8) | (col.value[2] << 16);
78 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
79 
80  if (data == coldata)
81  {
82  p[0] = col2.value[0];
83  p[1] = col2.value[1];
84  p[2] = col2.value[2];
85  break;
86  }
87 
88  ++ it;
89  }
90  }
91 }
92 
93 #ifdef SIMD_SUPPORTED
94 /*
95 static void print256(const char *const text, const __m256i &val);
96 static void print256(const char *const text, const __m256i &val)
97 {
98  printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
99 }
100 */
101 
102 __attribute__ ((target ("sse2")))
103 void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
104  const int bufSize) const restrict2
105 {
106  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
107  const size_t sz = mColors.size();
108  if ((sz == 0U) || (pixels == nullptr))
109  return;
110  if ((sz % 2) != 0U)
111  -- it_end;
112 
113  const int mod = bufSize % 4;
114  const int bufEnd = bufSize - mod;
115 
116  for (int ptr = 0; ptr < bufEnd; ptr += 4)
117  {
118  __m128i mask = _mm_set1_epi32(0x00ffffff);
119 // __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
120 // &pixels[ptr]));
121  __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
122  &pixels[ptr]));
123 
124  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
125  while (it != it_end)
126  {
127  const DyeColor &col = *it;
128  ++ it;
129  const DyeColor &col2 = *it;
130 
131  __m128i base2 = _mm_and_si128(mask, base);
132  __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
133  __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
134  __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
135  cmpRes = _mm_and_si128(mask, cmpRes);
136  __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
137  __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
138  base = _mm_or_si128(srcAnd, dstAnd);
139  ++ it;
140  }
141 // _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
142  _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
143  }
144 
145  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
146  {
147  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
148 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
149  const unsigned int data = pixels[ptr] & 0xffffff00;
150 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
151 
152  const unsigned int data = pixels[ptr] & 0x00ffffff;
153 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
154 
155  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
156  while (it != it_end)
157  {
158  const DyeColor &col = *it;
159  ++ it;
160  const DyeColor &col2 = *it;
161 
162 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
163  const unsigned int coldata = (col.value[0] << 24)
164  | (col.value[1] << 16) | (col.value[2] << 8);
165 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
166 
167  const unsigned int coldata = (col.value[0])
168  | (col.value[1] << 8) | (col.value[2] << 16);
169 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
170 
171  if (data == coldata)
172  {
173  p[0] = col2.value[0];
174  p[1] = col2.value[1];
175  p[2] = col2.value[2];
176  break;
177  }
178 
179  ++ it;
180  }
181  }
182 }
183 
184 __attribute__ ((target ("avx2")))
185 void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
186  const int bufSize) const restrict2
187 {
188  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
189  const size_t sz = mColors.size();
190  if ((sz == 0U) || (pixels == nullptr))
191  return;
192  if ((sz % 2) != 0U)
193  -- it_end;
194 
195  const int mod = bufSize % 8;
196  const int bufEnd = bufSize - mod;
197 
198  for (int ptr = 0; ptr < bufEnd; ptr += 8)
199  {
200  __m256i mask = _mm256_set1_epi32(0x00ffffff);
201 // __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
202 // &pixels[ptr]));
203  __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
204  &pixels[ptr]));
205 
206  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
207  while (it != it_end)
208  {
209  const DyeColor &col = *it;
210  ++ it;
211  const DyeColor &col2 = *it;
212 
213  __m256i base2 = _mm256_and_si256(mask, base);
214  __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
215  __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
216  __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
217  cmpRes = _mm256_and_si256(mask, cmpRes);
218  __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
219  __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
220  base = _mm256_or_si256(srcAnd, dstAnd);
221  ++ it;
222  }
223 // _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
224 // base);
225  _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
226  base);
227  }
228 
229  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
230  {
231  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
232 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
233  const unsigned int data = pixels[ptr] & 0xffffff00;
234 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
235 
236  const unsigned int data = pixels[ptr] & 0x00ffffff;
237 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
238 
239  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
240  while (it != it_end)
241  {
242  const DyeColor &col = *it;
243  ++ it;
244  const DyeColor &col2 = *it;
245 
246 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
247  const unsigned int coldata = (col.value[0] << 24)
248  | (col.value[1] << 16) | (col.value[2] << 8);
249 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
250 
251  const unsigned int coldata = (col.value[0])
252  | (col.value[1] << 8) | (col.value[2] << 16);
253 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
254 
255  if (data == coldata)
256  {
257  p[0] = col2.value[0];
258  p[1] = col2.value[1];
259  p[2] = col2.value[2];
260  break;
261  }
262 
263  ++ it;
264  }
265  }
266 }
267 
268 #endif // SIMD_SUPPORTED
269 #endif // USE_OPENGL
#define CAST_SIZE
Definition: cast.h:34
void replaceSOGLColorDefault(uint32_t *pixels, const int bufSize) const
#define restrict
Definition: localconsts.h:165
#define restrict2
Definition: localconsts.h:166
#define PRAGMA48(str)
Definition: localconsts.h:199
uint32_t data
union EAthena::ItemFlags __attribute__((packed))
std::map< std::string, DyeColor > mColors
Definition: palettedb.cpp:37
uint32_t valueSOgl
Definition: dyecolor.h:82
uint8_t value[4]
Definition: dyecolor.h:77