GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacescolor.cpp Lines: 99 99 100.0 %
Date: 2017-11-29 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2017  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "resources/dye/dyepalette.h"
24
25
PRAGMA48(GCC diagnostic push)
26
PRAGMA48(GCC diagnostic ignored "-Wshadow")
27
#ifndef SDL_BIG_ENDIAN
28
#include <SDL_endian.h>
29
#endif  // SDL_BYTEORDER
30
PRAGMA48(GCC diagnostic pop)
31
32
#ifdef SIMD_SUPPORTED
33
// avx2
34
#include <immintrin.h>
35
#endif  // SIMD_SUPPORTED
36
37
#include "debug.h"
38
39
22
void DyePalette::replaceSColorDefault(uint32_t *restrict pixels,
40
                                      const int bufSize) const restrict2
41
{
42
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
43
44
    const size_t sz = mColors.size();
44
22
    if (sz == 0u || pixels == nullptr)
45
        return;
46
22
    if ((sz % 2) != 0u)
47
        -- it_end;
48
49
#ifdef ENABLE_CILKPLUS
50
    cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
51
    {
52
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
53
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
54
        const unsigned int data = pixels[ptr] & 0x00ffffff;
55
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
56
57
        const unsigned int data = pixels[ptr] & 0xffffff00;
58
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
59
60
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
61
        while (it != it_end)
62
        {
63
            const DyeColor &col = *it;
64
            ++ it;
65
            const DyeColor &col2 = *it;
66
67
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
68
            const unsigned int coldata = (col.value[2] << 16U)
69
                | (col.value[1] << 8U) | (col.value[0]);
70
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
71
            const unsigned int coldata = (col.value[2] << 8U)
72
                | (col.value[1] << 16U) | (col.value[0] << 24U);
73
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
74
75
            if (data == coldata)
76
            {
77
                p[3] = col2.value[0];
78
                p[2] = col2.value[1];
79
                p[1] = col2.value[2];
80
                break;
81
            }
82
            ++ it;
83
        }
84
    }
85
#else  // ENABLE_CILKPLUS
86
87
124
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
88
124
         pixels != p_end;
89
         ++ pixels)
90
    {
91
102
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
92
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
93
        const unsigned int data = (*pixels) & 0x00ffffff;
94
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
95
96
102
        const unsigned int data = (*pixels) & 0xffffff00;
97
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
98
99
204
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
100
220
        while (it != it_end)
101
        {
102
198
            const DyeColor &col = *it;
103
198
            ++ it;
104
198
            const DyeColor &col2 = *it;
105
106
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
107
            const unsigned int coldata = (col.value[2] << 16U)
108
                | (col.value[1] << 8U) | (col.value[0]);
109
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
110
111
396
            const unsigned int coldata = (col.value[2] << 8U)
112
198
                | (col.value[1] << 16U) | (col.value[0] << 24U);
113
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
114
115
198
            if (data == coldata)
116
            {
117
80
                p[3] = col2.value[0];
118
80
                p[2] = col2.value[1];
119
80
                p[1] = col2.value[2];
120
80
                break;
121
            }
122
123
            ++ it;
124
        }
125
    }
126
#endif  // ENABLE_CILKPLUS
127
}
128
129
#ifdef SIMD_SUPPORTED
130
/*
131
static void print256(const char *const text, const __m256i &val);
132
static void print256(const char *const text, const __m256i &val)
133
{
134
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
135
}
136
*/
137
138
__attribute__ ((target ("sse2")))
139
22
void DyePalette::replaceSColorSse2(uint32_t *restrict pixels,
140
                                   const int bufSize) const restrict2
141
{
142
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
143
44
    const size_t sz = mColors.size();
144
22
    if (sz == 0u || pixels == nullptr)
145
        return;
146
22
    if ((sz % 2) != 0u)
147
        -- it_end;
148
22
    const int mod = bufSize % 8;
149
22
    const int bufEnd = bufSize - mod;
150
151
34
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
152
    {
153
12
        __m128i mask = _mm_set1_epi32(0xffffff00);
154
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
155
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
156
24
            &pixels[ptr]));
157
158
24
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
159
36
        while (it != it_end)
160
        {
161
24
            const DyeColor &col = *it;
162
24
            ++ it;
163
24
            const DyeColor &col2 = *it;
164
165
24
            __m128i base2 = _mm_and_si128(mask, base);
166
48
            __m128i newMask = _mm_set1_epi32(col2.valueS);
167
48
            __m128i cmpMask = _mm_set1_epi32(col.valueS);
168
24
            __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
169
24
            cmpRes = _mm_and_si128(mask, cmpRes);
170
24
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
171
24
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
172
24
            base = _mm_or_si128(srcAnd, dstAnd);
173
            ++ it;
174
        }
175
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
176
24
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
177
    }
178
179
    // complete end without simd
180
130
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
181
    {
182
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
183
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
184
        const unsigned int data = pixels[ptr] & 0x00ffffff;
185
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
186
187
54
        const unsigned int data = pixels[ptr] & 0xffffff00;
188
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
189
190
108
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
191
112
        while (it != it_end)
192
        {
193
102
            const DyeColor &col = *it;
194
102
            ++ it;
195
102
            const DyeColor &col2 = *it;
196
197
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
198
            const unsigned int coldata = (col.value[2] << 16U)
199
                | (col.value[1] << 8U) | (col.value[0]);
200
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
201
202
204
            const unsigned int coldata = (col.value[2] << 8U)
203
102
                | (col.value[1] << 16U) | (col.value[0] << 24U);
204
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
205
206
102
            if (data == coldata)
207
            {
208
44
                p[3] = col2.value[0];
209
44
                p[2] = col2.value[1];
210
44
                p[1] = col2.value[2];
211
44
                break;
212
            }
213
214
            ++ it;
215
        }
216
    }
217
}
218
219
__attribute__ ((target ("avx2")))
220
50
void DyePalette::replaceSColorAvx2(uint32_t *restrict pixels,
221
                                   const int bufSize) const restrict2
222
{
223
100
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
224
100
    const size_t sz = mColors.size();
225
50
    if (sz == 0u || pixels == nullptr)
226
        return;
227
50
    if ((sz % 2) != 0u)
228
        -- it_end;
229
50
    const int mod = bufSize % 8;
230
50
    const int bufEnd = bufSize - mod;
231
232
830
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
233
    {
234
780
        __m256i mask = _mm256_set1_epi32(0xffffff00);
235
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
236
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
237
1560
            &pixels[ptr]));
238
239
1560
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
240
4132
        while (it != it_end)
241
        {
242
3352
            const DyeColor &col = *it;
243
3352
            ++ it;
244
3352
            const DyeColor &col2 = *it;
245
246
3352
            __m256i base2 = _mm256_and_si256(mask, base);
247
6704
            __m256i newMask = _mm256_set1_epi32(col2.valueS);
248
6704
            __m256i cmpMask = _mm256_set1_epi32(col.valueS);
249
3352
            __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
250
3352
            cmpRes = _mm256_and_si256(mask, cmpRes);
251
3352
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
252
3352
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
253
3352
            base = _mm256_or_si256(srcAnd, dstAnd);
254
            ++ it;
255
        }
256
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
257
1560
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
258
    }
259
260
    // complete end without simd
261
266
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
262
    {
263
108
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
264
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
265
        const unsigned int data = pixels[ptr] & 0x00ffffff;
266
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
267
268
108
        const unsigned int data = pixels[ptr] & 0xffffff00;
269
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
270
271
216
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
272
224
        while (it != it_end)
273
        {
274
204
            const DyeColor &col = *it;
275
204
            ++ it;
276
204
            const DyeColor &col2 = *it;
277
278
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
279
            const unsigned int coldata = (col.value[2] << 16U)
280
                | (col.value[1] << 8U) | (col.value[0]);
281
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
282
283
408
            const unsigned int coldata = (col.value[2] << 8U)
284
204
                | (col.value[1] << 16U) | (col.value[0] << 24U);
285
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
286
287
204
            if (data == coldata)
288
            {
289
88
                p[3] = col2.value[0];
290
88
                p[2] = col2.value[1];
291
88
                p[1] = col2.value[2];
292
88
                break;
293
            }
294
295
            ++ it;
296
        }
297
    }
298
}
299
300
#endif  // SIMD_SUPPORTED