GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacesoglcolor.cpp Lines: 83 99 83.8 %
Date: 2017-11-29 Branches: 29 42 69.0 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2017  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#ifdef USE_OPENGL
24
25
#include "resources/dye/dyepalette.h"
26
27
PRAGMA48(GCC diagnostic push)
28
PRAGMA48(GCC diagnostic ignored "-Wshadow")
29
#ifndef SDL_BIG_ENDIAN
30
#include <SDL_endian.h>
31
#endif  // SDL_BYTEORDER
32
PRAGMA48(GCC diagnostic pop)
33
34
#ifdef SIMD_SUPPORTED
35
// avx2
36
#include <immintrin.h>
37
#endif  // SIMD_SUPPORTED
38
39
#include "debug.h"
40
41
12
void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
42
                                         const int bufSize) const restrict2
43
{
44
24
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
45
24
    const size_t sz = mColors.size();
46
12
    if ((sz == 0u) || (pixels == nullptr))
47
        return;
48
12
    if ((sz % 2) != 0u)
49
        -- it_end;
50
51
#ifdef ENABLE_CILKPLUS
52
    cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
53
    {
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
55
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
56
        const unsigned int data = (pixels[ptr]) & 0xffffff00;
57
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
58
59
        const unsigned int data = (pixels[ptr]) & 0x00ffffff;
60
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
61
62
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
63
        while (it != it_end)
64
        {
65
            const DyeColor &col = *it;
66
            ++ it;
67
            const DyeColor &col2 = *it;
68
69
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
70
            const unsigned int coldata = (col.value[0] << 24)
71
                | (col.value[1] << 16) | (col.value[2] << 8);
72
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
73
74
            const unsigned int coldata = (col.value[0])
75
                | (col.value[1] << 8) | (col.value[2] << 16);
76
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
77
78
            if (data == coldata)
79
            {
80
                p[0] = col2.value[0];
81
                p[1] = col2.value[1];
82
                p[2] = col2.value[2];
83
                break;
84
            }
85
86
            ++ it;
87
        }
88
    }
89
90
#else  // ENABLE_CILKPLUS
91
92
46
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
93
46
         pixels != p_end;
94
         ++pixels)
95
    {
96
34
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
97
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
98
        const unsigned int data = (*pixels) & 0xffffff00;
99
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
100
101
34
        const unsigned int data = (*pixels) & 0x00ffffff;
102
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
103
104
68
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
105
48
        while (it != it_end)
106
        {
107
40
            const DyeColor &col = *it;
108
40
            ++ it;
109
40
            const DyeColor &col2 = *it;
110
111
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
112
            const unsigned int coldata = (col.value[0] << 24)
113
                | (col.value[1] << 16) | (col.value[2] << 8);
114
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
115
116
80
            const unsigned int coldata = (col.value[0])
117
40
                | (col.value[1] << 8) | (col.value[2] << 16);
118
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
119
120
40
            if (data == coldata)
121
            {
122
26
                p[0] = col2.value[0];
123
26
                p[1] = col2.value[1];
124
26
                p[2] = col2.value[2];
125
26
                break;
126
            }
127
128
            ++ it;
129
        }
130
    }
131
#endif  // ENABLE_CILKPLUS
132
}
133
134
#ifdef SIMD_SUPPORTED
135
/*
136
static void print256(const char *const text, const __m256i &val);
137
static void print256(const char *const text, const __m256i &val)
138
{
139
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
140
}
141
*/
142
143
__attribute__ ((target ("sse2")))
144
2
void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
145
                                      const int bufSize) const restrict2
146
{
147
4
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
148
4
    const size_t sz = mColors.size();
149
2
    if ((sz == 0u) || (pixels == nullptr))
150
        return;
151
2
    if ((sz % 2) != 0u)
152
        -- it_end;
153
154
2
    if (bufSize >= 8)
155
    {
156
10
        for (int ptr = 0; ptr < bufSize; ptr += 4)
157
        {
158
4
            __m128i mask = _mm_set1_epi32(0x00ffffff);
159
//            __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
160
//             &pixels[ptr]));
161
            __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
162
8
                &pixels[ptr]));
163
164
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
165
12
            while (it != it_end)
166
            {
167
8
                const DyeColor &col = *it;
168
8
                ++ it;
169
8
                const DyeColor &col2 = *it;
170
171
8
                __m128i base2 = _mm_and_si128(mask, base);
172
16
                __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
173
16
                __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
174
8
                __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
175
8
                cmpRes = _mm_and_si128(mask, cmpRes);
176
8
                __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
177
8
                __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
178
8
                base = _mm_or_si128(srcAnd, dstAnd);
179
                ++ it;
180
            }
181
//            _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
182
8
            _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
183
        }
184
    }
185
    else
186
    {
187
#ifdef ENABLE_CILKPLUS
188
        cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
189
        {
190
            uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
191
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
192
            const unsigned int data = (pixels[ptr]) & 0xffffff00;
193
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
194
195
            const unsigned int data = (pixels[ptr]) & 0x00ffffff;
196
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
197
198
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
199
            while (it != it_end)
200
            {
201
                const DyeColor &col = *it;
202
                ++ it;
203
                const DyeColor &col2 = *it;
204
205
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
206
                const unsigned int coldata = (col.value[0] << 24)
207
                    | (col.value[1] << 16) | (col.value[2] << 8);
208
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
209
210
                const unsigned int coldata = (col.value[0])
211
                    | (col.value[1] << 8) | (col.value[2] << 16);
212
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
213
214
                if (data == coldata)
215
                {
216
                    p[0] = col2.value[0];
217
                    p[1] = col2.value[1];
218
                    p[2] = col2.value[2];
219
                    break;
220
                }
221
222
                ++ it;
223
            }
224
        }
225
226
#else  // ENABLE_CILKPLUS
227
228
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
229
             pixels != p_end;
230
             ++pixels)
231
        {
232
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
233
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
234
            const unsigned int data = (*pixels) & 0xffffff00;
235
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
236
237
            const unsigned int data = (*pixels) & 0x00ffffff;
238
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
239
240
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
241
            while (it != it_end)
242
            {
243
                const DyeColor &col = *it;
244
                ++ it;
245
                const DyeColor &col2 = *it;
246
247
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
248
                const unsigned int coldata = (col.value[0] << 24)
249
                    | (col.value[1] << 16) | (col.value[2] << 8);
250
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
251
252
                const unsigned int coldata = (col.value[0])
253
                    | (col.value[1] << 8) | (col.value[2] << 16);
254
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
255
256
                if (data == coldata)
257
                {
258
                    p[0] = col2.value[0];
259
                    p[1] = col2.value[1];
260
                    p[2] = col2.value[2];
261
                    break;
262
                }
263
264
                ++ it;
265
            }
266
        }
267
#endif  // ENABLE_CILKPLUS
268
    }
269
}
270
271
__attribute__ ((target ("avx2")))
272
14
void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
273
                                      const int bufSize) const restrict2
274
{
275
28
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
276
28
    const size_t sz = mColors.size();
277
14
    if ((sz == 0u) || (pixels == nullptr))
278
        return;
279
14
    if ((sz % 2) != 0u)
280
        -- it_end;
281
282
14
    if (bufSize >= 8)
283
    {
284
12
        for (int ptr = 0; ptr < bufSize; ptr += 8)
285
        {
286
4
            __m256i mask = _mm256_set1_epi32(0x00ffffff);
287
//          __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
288
//              &pixels[ptr]));
289
            __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
290
8
                &pixels[ptr]));
291
292
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
293
12
            while (it != it_end)
294
            {
295
8
                const DyeColor &col = *it;
296
8
                ++ it;
297
8
                const DyeColor &col2 = *it;
298
299
8
                __m256i base2 = _mm256_and_si256(mask, base);
300
16
                __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
301
16
                __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
302
8
                __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
303
8
                cmpRes = _mm256_and_si256(mask, cmpRes);
304
8
                __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
305
8
                __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
306
8
                base = _mm256_or_si256(srcAnd, dstAnd);
307
                ++ it;
308
            }
309
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
310
//                base);
311
8
            _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
312
                base);
313
        }
314
    }
315
    else
316
    {
317
#ifdef ENABLE_CILKPLUS
318
        cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
319
        {
320
            uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
321
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
322
            const unsigned int data = (pixels[ptr]) & 0xffffff00;
323
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
324
325
            const unsigned int data = (pixels[ptr]) & 0x00ffffff;
326
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
327
328
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
329
            while (it != it_end)
330
            {
331
                const DyeColor &col = *it;
332
                ++ it;
333
                const DyeColor &col2 = *it;
334
335
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
336
                const unsigned int coldata = (col.value[0] << 24)
337
                    | (col.value[1] << 16) | (col.value[2] << 8);
338
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
339
340
                const unsigned int coldata = (col.value[0])
341
                    | (col.value[1] << 8) | (col.value[2] << 16);
342
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
343
344
                if (data == coldata)
345
                {
346
                    p[0] = col2.value[0];
347
                    p[1] = col2.value[1];
348
                    p[2] = col2.value[2];
349
                    break;
350
                }
351
352
                ++ it;
353
            }
354
        }
355
356
#else  // ENABLE_CILKPLUS
357
358
28
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
359
28
             pixels != p_end;
360
             ++pixels)
361
        {
362
18
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
363
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
364
            const unsigned int data = (*pixels) & 0xffffff00;
365
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
366
367
18
            const unsigned int data = (*pixels) & 0x00ffffff;
368
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
369
370
36
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
371
24
            while (it != it_end)
372
            {
373
20
                const DyeColor &col = *it;
374
20
                ++ it;
375
20
                const DyeColor &col2 = *it;
376
377
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
378
                const unsigned int coldata = (col.value[0] << 24)
379
                    | (col.value[1] << 16) | (col.value[2] << 8);
380
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
381
382
40
                const unsigned int coldata = (col.value[0])
383
20
                    | (col.value[1] << 8) | (col.value[2] << 16);
384
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
385
386
20
                if (data == coldata)
387
                {
388
14
                    p[0] = col2.value[0];
389
14
                    p[1] = col2.value[1];
390
14
                    p[2] = col2.value[2];
391
14
                    break;
392
                }
393
394
                ++ it;
395
            }
396
        }
397
#endif  // ENABLE_CILKPLUS
398
    }
399
}
400
401
#endif  // SIMD_SUPPORTED
402
#endif  // USE_OPENGL