ManaPlus
dyepalette_replaceaoglcolor.cpp
Go to the documentation of this file.
1 /*
2  * The ManaPlus Client
3  * Copyright (C) 2007-2009 The Mana World Development Team
4  * Copyright (C) 2009-2010 The Mana Developers
5  * Copyright (C) 2011-2019 The ManaPlus Developers
6  * Copyright (C) 2019-2021 Andrei Karas
7  *
8  * This file is part of The ManaPlus Client.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program. If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 #ifdef USE_OPENGL
25 
27 
28 PRAGMA48(GCC diagnostic push)
29 PRAGMA48(GCC diagnostic ignored "-Wshadow")
30 #ifndef SDL_BIG_ENDIAN
31 #include <SDL_endian.h>
32 #endif // SDL_BYTEORDER
33 PRAGMA48(GCC diagnostic pop)
34 
35 #ifdef SIMD_SUPPORTED
36 // avx2
37 #include <immintrin.h>
38 #endif // SIMD_SUPPORTED
39 
40 #include "debug.h"
41 
43  const int bufSize) const restrict2
44 {
45  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
46  const size_t sz = mColors.size();
47  if (sz == 0U || pixels == nullptr)
48  return;
49  if ((sz % 2) != 0U)
50  -- it_end;
51 
52  for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
53  pixels != p_end;
54  ++pixels)
55  {
56  uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
57  const unsigned int data = *pixels;
58 
59  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
60  while (it != it_end)
61  {
62  const DyeColor &col = *it;
63  ++ it;
64  const DyeColor &col2 = *it;
65 
66 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
67  const unsigned int coldata = (col.value[0] << 24U)
68  | (col.value[1] << 16U)
69  | (col.value[2] << 8U)
70  | col.value[3];
71 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
72 
73  const unsigned int coldata = (col.value[0])
74  | (col.value[1] << 8U)
75  | (col.value[2] << 16U)
76  | (col.value[3] << 24U);
77 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
78 
79  if (data == coldata)
80  {
81  p[0] = col2.value[0];
82  p[1] = col2.value[1];
83  p[2] = col2.value[2];
84  p[3] = col2.value[3];
85  break;
86  }
87 
88  ++ it;
89  }
90  }
91 }
92 
93 #ifdef SIMD_SUPPORTED
94 /*
95 static void print256(const char *const text, const __m256i &val);
96 static void print256(const char *const text, const __m256i &val)
97 {
98  printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
99 }
100 */
101 
102 __attribute__ ((target ("sse2")))
103 void DyePalette::replaceAOGLColorSse2(uint32_t *restrict pixels,
104  const int bufSize) const restrict2
105 {
106  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
107  const size_t sz = mColors.size();
108  if (sz == 0U || pixels == nullptr)
109  return;
110  if ((sz % 2) != 0U)
111  -- it_end;
112 
113  const int mod = bufSize % 4;
114  const int bufEnd = bufSize - mod;
115 
116  for (int ptr = 0; ptr < bufEnd; ptr += 4)
117  {
118 // __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
119 // &pixels[ptr]));
120  __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
121  &pixels[ptr]));
122 
123  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
124  while (it != it_end)
125  {
126  const DyeColor &col = *it;
127  ++ it;
128  const DyeColor &col2 = *it;
129 
130  __m128i newMask = _mm_set1_epi32(col2.valueAOgl);
131  __m128i cmpMask = _mm_set1_epi32(col.valueAOgl);
132  __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
133  __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
134  __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
135  base = _mm_or_si128(srcAnd, dstAnd);
136 
137  ++ it;
138  }
139 // _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
140  _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
141  }
142 
143  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
144  {
145  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
146  const unsigned int data = pixels[ptr];
147 
148  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
149  while (it != it_end)
150  {
151  const DyeColor &col = *it;
152  ++ it;
153  const DyeColor &col2 = *it;
154 
155 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
156  const unsigned int coldata = (col.value[0] << 24U)
157  | (col.value[1] << 16U)
158  | (col.value[2] << 8U)
159  | col.value[3];
160 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
161 
162  const unsigned int coldata = (col.value[0])
163  | (col.value[1] << 8U)
164  | (col.value[2] << 16U)
165  | (col.value[3] << 24U);
166 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
167 
168  if (data == coldata)
169  {
170  p[0] = col2.value[0];
171  p[1] = col2.value[1];
172  p[2] = col2.value[2];
173  p[3] = col2.value[3];
174  break;
175  }
176 
177  ++ it;
178  }
179  }
180 }
181 
182 __attribute__ ((target ("avx2")))
183 void DyePalette::replaceAOGLColorAvx2(uint32_t *restrict pixels,
184  const int bufSize) const restrict2
185 {
186  STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
187  const size_t sz = mColors.size();
188  if (sz == 0U || pixels == nullptr)
189  return;
190  if ((sz % 2) != 0U)
191  -- it_end;
192 
193  const int mod = bufSize % 8;
194  const int bufEnd = bufSize - mod;
195 
196  for (int ptr = 0; ptr < bufEnd; ptr += 8)
197  {
198 // __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
199 // &pixels[ptr]));
200  __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
201  &pixels[ptr]));
202 
203  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
204  while (it != it_end)
205  {
206  const DyeColor &col = *it;
207  ++ it;
208  const DyeColor &col2 = *it;
209 
210  __m256i newMask = _mm256_set1_epi32(col2.valueAOgl);
211  __m256i cmpMask = _mm256_set1_epi32(col.valueAOgl);
212  __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
213  __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
214  __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
215  base = _mm256_or_si256(srcAnd, dstAnd);
216 
217  ++ it;
218  }
219 // _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
220 // base);
221  _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
222  base);
223  }
224 
225  for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
226  {
227  uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
228  const unsigned int data = pixels[ptr];
229 
230  STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
231  while (it != it_end)
232  {
233  const DyeColor &col = *it;
234  ++ it;
235  const DyeColor &col2 = *it;
236 
237 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
238  const unsigned int coldata = (col.value[0] << 24U)
239  | (col.value[1] << 16U)
240  | (col.value[2] << 8U)
241  | col.value[3];
242 #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
243 
244  const unsigned int coldata = (col.value[0])
245  | (col.value[1] << 8U)
246  | (col.value[2] << 16U)
247  | (col.value[3] << 24U);
248 #endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
249 
250  if (data == coldata)
251  {
252  p[0] = col2.value[0];
253  p[1] = col2.value[1];
254  p[2] = col2.value[2];
255  p[3] = col2.value[3];
256  break;
257  }
258  ++ it;
259  }
260  }
261 }
262 
263 #endif // SIMD_SUPPORTED
264 #endif // USE_OPENGL
#define CAST_SIZE
Definition: cast.h:34
void replaceAOGLColorDefault(uint32_t *pixels, const int bufSize) const
#define restrict
Definition: localconsts.h:165
#define restrict2
Definition: localconsts.h:166
#define PRAGMA48(str)
Definition: localconsts.h:199
uint32_t data
union EAthena::ItemFlags __attribute__((packed))
std::map< std::string, DyeColor > mColors
Definition: palettedb.cpp:37
uint8_t value[4]
Definition: dyecolor.h:77
uint32_t valueAOgl
Definition: dyecolor.h:78