27 PRAGMA48(GCC diagnostic ignored
"-Wshadow")
28 #ifndef SDL_BIG_ENDIAN
29 #include <SDL_endian.h>
35 #include <immintrin.h>
43 STD_VECTOR<DyeColor>::const_iterator it_end =
mColors.end();
44 const size_t sz =
mColors.size();
45 if (sz == 0U || pixels ==
nullptr)
50 for (
const uint32_t *
const p_end = pixels +
CAST_SIZE(bufSize);
54 uint8_t *
const p =
reinterpret_cast<uint8_t *
>(pixels);
55 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
56 const unsigned int data = (*pixels) & 0x00ffffffU;
59 const unsigned int data = (*pixels) & 0xffffff00U;
62 STD_VECTOR<DyeColor>::const_iterator it =
mColors.begin();
69 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
70 const unsigned int coldata = (col.
value[2] << 16U)
74 const unsigned int coldata = (col.
value[2] << 8U)
75 | (col.
value[1] << 16U) | (col.
value[0] << 24U);
104 STD_VECTOR<DyeColor>::const_iterator it_end =
mColors.end();
105 const size_t sz =
mColors.size();
106 if (sz == 0U || pixels ==
nullptr)
110 const int mod = bufSize % 8;
111 const int bufEnd = bufSize - mod;
113 for (
int ptr = 0; ptr < bufEnd; ptr += 4)
115 __m128i mask = _mm_set1_epi32(0xffffff00U);
117 __m128i
base = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(
120 STD_VECTOR<DyeColor>::const_iterator it =
mColors.begin();
127 __m128i base2 = _mm_and_si128(mask,
base);
128 __m128i newMask = _mm_set1_epi32(col2.
valueS);
129 __m128i cmpMask = _mm_set1_epi32(col.
valueS);
130 __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
131 cmpRes = _mm_and_si128(mask, cmpRes);
132 __m128i srcAnd = _mm_andnot_si128(cmpRes,
base);
133 __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
134 base = _mm_or_si128(srcAnd, dstAnd);
138 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(&pixels[ptr]),
base);
142 for (
int ptr = bufSize - mod; ptr < bufSize; ptr ++)
144 uint8_t *
const p =
reinterpret_cast<uint8_t *
>(&pixels[ptr]);
145 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
146 const unsigned int data = pixels[ptr] & 0x00ffffffU;
149 const unsigned int data = pixels[ptr] & 0xffffff00U;
152 STD_VECTOR<DyeColor>::const_iterator it =
mColors.begin();
159 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
160 const unsigned int coldata = (col.
value[2] << 16U)
164 const unsigned int coldata = (col.
value[2] << 8U)
165 | (col.
value[1] << 16U) | (col.
value[0] << 24U);
170 p[3] = col2.
value[0];
171 p[2] = col2.
value[1];
172 p[1] = col2.
value[2];
185 STD_VECTOR<DyeColor>::const_iterator it_end =
mColors.end();
186 const size_t sz =
mColors.size();
187 if (sz == 0U || pixels ==
nullptr)
191 const int mod = bufSize % 8;
192 const int bufEnd = bufSize - mod;
194 for (
int ptr = 0; ptr < bufEnd; ptr += 8)
196 __m256i mask = _mm256_set1_epi32(0xffffff00U);
198 __m256i
base = _mm256_loadu_si256(
reinterpret_cast<__m256i*
>(
201 STD_VECTOR<DyeColor>::const_iterator it =
mColors.begin();
208 __m256i base2 = _mm256_and_si256(mask,
base);
209 __m256i newMask = _mm256_set1_epi32(col2.
valueS);
210 __m256i cmpMask = _mm256_set1_epi32(col.
valueS);
211 __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
212 cmpRes = _mm256_and_si256(mask, cmpRes);
213 __m256i srcAnd = _mm256_andnot_si256(cmpRes,
base);
214 __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
215 base = _mm256_or_si256(srcAnd, dstAnd);
219 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>(&pixels[ptr]),
base);
223 for (
int ptr = bufSize - mod; ptr < bufSize; ptr ++)
225 uint8_t *
const p =
reinterpret_cast<uint8_t *
>(&pixels[ptr]);
226 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
227 const unsigned int data = pixels[ptr] & 0x00ffffffU;
230 const unsigned int data = pixels[ptr] & 0xffffff00U;
233 STD_VECTOR<DyeColor>::const_iterator it =
mColors.begin();
240 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
241 const unsigned int coldata = (col.
value[2] << 16U)
245 const unsigned int coldata = (col.
value[2] << 8U)
246 | (col.
value[1] << 16U) | (col.
value[0] << 24U);
251 p[3] = col2.
value[0];
252 p[2] = col2.
value[1];
253 p[1] = col2.
value[2];
void replaceSColorDefault(uint32_t *pixels, const int bufSize) const
union EAthena::ItemFlags __attribute__((packed))
std::map< std::string, DyeColor > mColors