GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replaceaoglcolor.cpp Lines: 83 102 81.4 %
Date: 2017-11-29 Branches: 29 42 69.0 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2017  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#ifdef USE_OPENGL
24
25
#include "resources/dye/dyepalette.h"
26
27
PRAGMA48(GCC diagnostic push)
28
PRAGMA48(GCC diagnostic ignored "-Wshadow")
29
#ifndef SDL_BIG_ENDIAN
30
#include <SDL_endian.h>
31
#endif  // SDL_BYTEORDER
32
PRAGMA48(GCC diagnostic pop)
33
34
#ifdef SIMD_SUPPORTED
35
// avx2
36
#include <immintrin.h>
37
#endif  // SIMD_SUPPORTED
38
39
#include "debug.h"
40
41
12
void DyePalette::replaceAOGLColorDefault(uint32_t *restrict pixels,
42
                                         const int bufSize) const restrict2
43
{
44
24
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
45
24
    const size_t sz = mColors.size();
46
12
    if (sz == 0u || pixels == nullptr)
47
        return;
48
12
    if ((sz % 2) != 0u)
49
        -- it_end;
50
51
#ifdef ENABLE_CILKPLUS
52
    cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
53
    {
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
55
        const unsigned int data = pixels[ptr];
56
57
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
58
        while (it != it_end)
59
        {
60
            const DyeColor &col = *it;
61
            ++ it;
62
            const DyeColor &col2 = *it;
63
64
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
65
            const unsigned int coldata = (col.value[0] << 24U)
66
                | (col.value[1] << 16U)
67
                | (col.value[2] << 8U)
68
                | col.value[3];
69
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
70
71
            const unsigned int coldata = (col.value[0])
72
                | (col.value[1] << 8U)
73
                | (col.value[2] << 16U)
74
                | (col.value[3] << 24U);
75
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
76
77
            if (data == coldata)
78
            {
79
                p[0] = col2.value[0];
80
                p[1] = col2.value[1];
81
                p[2] = col2.value[2];
82
                p[3] = col2.value[3];
83
                break;
84
            }
85
86
            ++ it;
87
        }
88
    }
89
90
#else  // ENABLE_CILKPLUS
91
92
46
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
93
46
         pixels != p_end;
94
         ++pixels)
95
    {
96
34
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
97
34
        const unsigned int data = *pixels;
98
99
68
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
100
70
        while (it != it_end)
101
        {
102
62
            const DyeColor &col = *it;
103
62
            ++ it;
104
62
            const DyeColor &col2 = *it;
105
106
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
107
            const unsigned int coldata = (col.value[0] << 24U)
108
                | (col.value[1] << 16U)
109
                | (col.value[2] << 8U)
110
                | col.value[3];
111
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
112
113
124
            const unsigned int coldata = (col.value[0])
114
62
                | (col.value[1] << 8U)
115
62
                | (col.value[2] << 16U)
116
62
                | (col.value[3] << 24U);
117
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
118
119
62
            if (data == coldata)
120
            {
121
26
                p[0] = col2.value[0];
122
26
                p[1] = col2.value[1];
123
26
                p[2] = col2.value[2];
124
26
                p[3] = col2.value[3];
125
26
                break;
126
            }
127
128
            ++ it;
129
        }
130
    }
131
#endif  // ENABLE_CILKPLUS
132
}
133
134
#ifdef SIMD_SUPPORTED
135
/*
136
static void print256(const char *const text, const __m256i &val);
137
static void print256(const char *const text, const __m256i &val)
138
{
139
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
140
}
141
*/
142
143
__attribute__ ((target ("sse2")))
144
2
void DyePalette::replaceAOGLColorSse2(uint32_t *restrict pixels,
145
                                      const int bufSize) const restrict2
146
{
147
4
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
148
4
    const size_t sz = mColors.size();
149
2
    if (sz == 0u || pixels == nullptr)
150
        return;
151
2
    if ((sz % 2) != 0u)
152
        -- it_end;
153
154
2
    if (bufSize >= 8)
155
    {
156
10
        for (int ptr = 0; ptr < bufSize; ptr += 4)
157
        {
158
//            __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
159
//                &pixels[ptr]));
160
            __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
161
8
                &pixels[ptr]));
162
163
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
164
12
            while (it != it_end)
165
            {
166
8
                const DyeColor &col = *it;
167
8
                ++ it;
168
8
                const DyeColor &col2 = *it;
169
170
16
                __m128i newMask = _mm_set1_epi32(col2.valueAOgl);
171
16
                __m128i cmpMask = _mm_set1_epi32(col.valueAOgl);
172
8
                __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
173
8
                __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
174
8
                __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
175
8
                base = _mm_or_si128(srcAnd, dstAnd);
176
177
                ++ it;
178
            }
179
//            _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
180
8
            _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
181
        }
182
    }
183
    else
184
    {
185
#ifdef ENABLE_CILKPLUS
186
        cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
187
        {
188
            uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
189
            const unsigned int data = pixels[ptr];
190
191
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
192
            while (it != it_end)
193
            {
194
                const DyeColor &col = *it;
195
                ++ it;
196
                const DyeColor &col2 = *it;
197
198
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
199
                const unsigned int coldata = (col.value[0] << 24U)
200
                    | (col.value[1] << 16U)
201
                    | (col.value[2] << 8U)
202
                    | col.value[3];
203
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
204
205
                const unsigned int coldata = (col.value[0])
206
                    | (col.value[1] << 8U)
207
                    | (col.value[2] << 16U)
208
                    | (col.value[3] << 24U);
209
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
210
211
                if (data == coldata)
212
                {
213
                    p[0] = col2.value[0];
214
                    p[1] = col2.value[1];
215
                    p[2] = col2.value[2];
216
                    p[3] = col2.value[3];
217
                    break;
218
                }
219
220
                ++ it;
221
            }
222
        }
223
224
#else  // ENABLE_CILKPLUS
225
226
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
227
             pixels != p_end;
228
             ++pixels)
229
        {
230
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
231
            const unsigned int data = *pixels;
232
233
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
234
            while (it != it_end)
235
            {
236
                const DyeColor &col = *it;
237
                ++ it;
238
                const DyeColor &col2 = *it;
239
240
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
241
                const unsigned int coldata = (col.value[0] << 24U)
242
                    | (col.value[1] << 16U)
243
                    | (col.value[2] << 8U)
244
                    | col.value[3];
245
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
246
247
                const unsigned int coldata = (col.value[0])
248
                    | (col.value[1] << 8U)
249
                    | (col.value[2] << 16U)
250
                    | (col.value[3] << 24U);
251
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
252
253
                if (data == coldata)
254
                {
255
                    p[0] = col2.value[0];
256
                    p[1] = col2.value[1];
257
                    p[2] = col2.value[2];
258
                    p[3] = col2.value[3];
259
                    break;
260
                }
261
262
                ++ it;
263
            }
264
        }
265
#endif  // ENABLE_CILKPLUS
266
    }
267
}
268
269
__attribute__ ((target ("avx2")))
270
14
void DyePalette::replaceAOGLColorAvx2(uint32_t *restrict pixels,
271
                                      const int bufSize) const restrict2
272
{
273
28
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
274
28
    const size_t sz = mColors.size();
275
14
    if (sz == 0u || pixels == nullptr)
276
        return;
277
14
    if ((sz % 2) != 0u)
278
        -- it_end;
279
280
14
    if (bufSize >= 8)
281
    {
282
12
        for (int ptr = 0; ptr < bufSize; ptr += 8)
283
        {
284
//            __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
285
//                &pixels[ptr]));
286
            __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
287
8
                &pixels[ptr]));
288
289
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
290
12
            while (it != it_end)
291
            {
292
8
                const DyeColor &col = *it;
293
8
                ++ it;
294
8
                const DyeColor &col2 = *it;
295
296
16
                __m256i newMask = _mm256_set1_epi32(col2.valueAOgl);
297
16
                __m256i cmpMask = _mm256_set1_epi32(col.valueAOgl);
298
8
                __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
299
8
                __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
300
8
                __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
301
8
                base = _mm256_or_si256(srcAnd, dstAnd);
302
303
                ++ it;
304
            }
305
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
306
//                base);
307
8
            _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
308
                base);
309
        }
310
    }
311
    else
312
    {
313
#ifdef ENABLE_CILKPLUS
314
        cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
315
        {
316
            uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
317
            const unsigned int data = pixels[ptr];
318
319
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
320
            while (it != it_end)
321
            {
322
                const DyeColor &col = *it;
323
                ++ it;
324
                const DyeColor &col2 = *it;
325
326
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
327
                const unsigned int coldata = (col.value[0] << 24U)
328
                    | (col.value[1] << 16U)
329
                    | (col.value[2] << 8U)
330
                    | col.value[3];
331
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
332
333
                const unsigned int coldata = (col.value[0])
334
                    | (col.value[1] << 8U)
335
                    | (col.value[2] << 16U)
336
                    | (col.value[3] << 24U);
337
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
338
339
                if (data == coldata)
340
                {
341
                    p[0] = col2.value[0];
342
                    p[1] = col2.value[1];
343
                    p[2] = col2.value[2];
344
                    p[3] = col2.value[3];
345
                    break;
346
                }
347
348
                ++ it;
349
            }
350
        }
351
352
#else  // ENABLE_CILKPLUS
353
354
28
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
355
28
             pixels != p_end;
356
             ++pixels)
357
        {
358
18
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
359
18
            const unsigned int data = *pixels;
360
361
36
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
362
36
            while (it != it_end)
363
            {
364
32
                const DyeColor &col = *it;
365
32
                ++ it;
366
32
                const DyeColor &col2 = *it;
367
368
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
369
                const unsigned int coldata = (col.value[0] << 24U)
370
                    | (col.value[1] << 16U)
371
                    | (col.value[2] << 8U)
372
                    | col.value[3];
373
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
374
375
64
                const unsigned int coldata = (col.value[0])
376
32
                    | (col.value[1] << 8U)
377
32
                    | (col.value[2] << 16U)
378
32
                    | (col.value[3] << 24U);
379
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
380
381
32
                if (data == coldata)
382
                {
383
14
                    p[0] = col2.value[0];
384
14
                    p[1] = col2.value[1];
385
14
                    p[2] = col2.value[2];
386
14
                    p[3] = col2.value[3];
387
14
                    break;
388
                }
389
390
                ++ it;
391
            }
392
        }
393
#endif  // ENABLE_CILKPLUS
394
    }
395
}
396
397
#endif   // SIMD_SUPPORTED
398
#endif  // USE_OPENGL