GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replaceacolor.cpp Lines: 102 102 100.0 %
Date: 2017-11-29 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2017  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "resources/dye/dyepalette.h"
24
25
PRAGMA48(GCC diagnostic push)
26
PRAGMA48(GCC diagnostic ignored "-Wshadow")
27
#ifndef SDL_BIG_ENDIAN
28
#include <SDL_endian.h>
29
#endif  // SDL_BYTEORDER
30
PRAGMA48(GCC diagnostic pop)
31
32
#ifdef SIMD_SUPPORTED
33
// avx2
34
#include <immintrin.h>
35
#endif  // SIMD_SUPPORTED
36
37
#include "debug.h"
38
39
22
void DyePalette::replaceAColorDefault(uint32_t *restrict pixels,
40
                                      const int bufSize) const restrict2
41
{
42
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
43
44
    const size_t sz = mColors.size();
44
22
    if ((sz == 0u) || (pixels == nullptr))
45
        return;
46
22
    if ((sz % 2) != 0u)
47
        -- it_end;
48
49
#ifdef ENABLE_CILKPLUS
50
    cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
51
    {
52
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
53
        const unsigned int data = pixels[ptr];
54
55
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
56
        while (it != it_end)
57
        {
58
            const DyeColor &col = *it;
59
            ++ it;
60
            const DyeColor &col2 = *it;
61
62
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
63
            const unsigned int coldata = (col.value[3] << 24U)
64
                | (col.value[2] << 16U)
65
                | (col.value[1] << 8U)
66
                | (col.value[0]);
67
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
68
69
            const unsigned int coldata = (col.value[3])
70
                | (col.value[2] << 8U)
71
                | (col.value[1] << 16U) |
72
                (col.value[0] << 24U);
73
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
74
75
            if (data == coldata)
76
            {
77
                p[3] = col2.value[0];
78
                p[2] = col2.value[1];
79
                p[1] = col2.value[2];
80
                p[0] = col2.value[3];
81
                break;
82
            }
83
84
            ++ it;
85
        }
86
    }
87
88
#else  // ENABLE_CILKPLUS
89
90
124
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
91
124
         pixels != p_end;
92
         ++pixels)
93
    {
94
102
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
95
102
        const unsigned int data = *pixels;
96
97
204
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
98
204
        while (it != it_end)
99
        {
100
166
            const DyeColor &col = *it;
101
166
            ++ it;
102
166
            const DyeColor &col2 = *it;
103
104
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
105
            const unsigned int coldata = (col.value[3] << 24U)
106
                | (col.value[2] << 16U)
107
                | (col.value[1] << 8U)
108
                | (col.value[0]);
109
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
110
332
            const unsigned int coldata = (col.value[3])
111
166
                | (col.value[2] << 8U)
112
332
                | (col.value[1] << 16U) |
113
166
                (col.value[0] << 24U);
114
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
115
116
166
            if (data == coldata)
117
            {
118
64
                p[3] = col2.value[0];
119
64
                p[2] = col2.value[1];
120
64
                p[1] = col2.value[2];
121
64
                p[0] = col2.value[3];
122
64
                break;
123
            }
124
125
            ++ it;
126
        }
127
    }
128
#endif  // ENABLE_CILKPLUS
129
}
130
131
#ifdef SIMD_SUPPORTED
132
/*
133
static void print256(const char *const text, const __m256i &val);
134
static void print256(const char *const text, const __m256i &val)
135
{
136
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
137
}
138
*/
139
140
__attribute__ ((target ("sse2")))
141
22
void DyePalette::replaceAColorSse2(uint32_t *restrict pixels,
142
                                   const int bufSize) const restrict2
143
{
144
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
145
44
    const size_t sz = mColors.size();
146
22
    if ((sz == 0u) || (pixels == nullptr))
147
        return;
148
22
    if ((sz % 2) != 0u)
149
        -- it_end;
150
22
    const int mod = bufSize % 4;
151
22
    const int bufEnd = bufSize - mod;
152
153
40
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
154
    {
155
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
156
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
157
36
            &pixels[ptr]));
158
159
36
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
160
54
        while (it != it_end)
161
        {
162
36
            const DyeColor &col = *it;
163
36
            ++ it;
164
36
            const DyeColor &col2 = *it;
165
166
72
            __m128i newMask = _mm_set1_epi32(col2.valueA);
167
72
            __m128i cmpMask = _mm_set1_epi32(col.valueA);
168
36
            __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
169
36
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
170
36
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
171
36
            base = _mm_or_si128(srcAnd, dstAnd);
172
173
            ++ it;
174
        }
175
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
176
36
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
177
    }
178
179
    // complete end without simd
180
82
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
181
    {
182
30
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
183
30
        const unsigned int data = pixels[ptr];
184
185
60
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
186
54
        while (it != it_end)
187
        {
188
46
            const DyeColor &col = *it;
189
46
            ++ it;
190
46
            const DyeColor &col2 = *it;
191
192
138
            const unsigned int coldata = (col.value[3]) |
193
92
                (col.value[2] << 8U) |
194
92
                (col.value[1] << 16U) |
195
46
                (col.value[0] << 24U);
196
197
46
            if (data == coldata)
198
            {
199
22
                p[3] = col2.value[0];
200
22
                p[2] = col2.value[1];
201
22
                p[1] = col2.value[2];
202
22
                p[0] = col2.value[3];
203
22
                break;
204
            }
205
206
            ++ it;
207
        }
208
    }
209
}
210
211
__attribute__ ((target ("avx2")))
212
46
void DyePalette::replaceAColorAvx2(uint32_t *restrict pixels,
213
                                   const int bufSize) const restrict2
214
{
215
92
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
216
92
    const size_t sz = mColors.size();
217
46
    if ((sz == 0u) || (pixels == nullptr))
218
        return;
219
46
    if ((sz % 2) != 0u)
220
        -- it_end;
221
46
    const int mod = bufSize % 8;
222
46
    const int bufEnd = bufSize - mod;
223
224
314
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
225
    {
226
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
227
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
228
536
            &pixels[ptr]));
229
230
536
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
231
548
        while (it != it_end)
232
        {
233
280
            const DyeColor &col = *it;
234
280
            ++ it;
235
280
            const DyeColor &col2 = *it;
236
237
560
            __m256i newMask = _mm256_set1_epi32(col2.valueA);
238
560
            __m256i cmpMask = _mm256_set1_epi32(col.valueA);
239
280
            __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
240
280
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
241
280
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
242
280
            base = _mm256_or_si256(srcAnd, dstAnd);
243
244
            ++ it;
245
        }
246
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
247
536
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
248
    }
249
250
    // complete end without simd
251
262
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
252
    {
253
108
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
254
108
        const unsigned int data = pixels[ptr];
255
256
216
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
257
200
        while (it != it_end)
258
        {
259
168
            const DyeColor &col = *it;
260
168
            ++ it;
261
168
            const DyeColor &col2 = *it;
262
263
504
            const unsigned int coldata = (col.value[3]) |
264
336
                (col.value[2] << 8U) |
265
336
                (col.value[1] << 16U) |
266
168
                (col.value[0] << 24U);
267
268
168
            if (data == coldata)
269
            {
270
76
                p[3] = col2.value[0];
271
76
                p[2] = col2.value[1];
272
76
                p[1] = col2.value[2];
273
76
                p[0] = col2.value[3];
274
76
                break;
275
            }
276
277
            ++ it;
278
        }
279
    }
280
}
281
282
#endif  // SIMD_SUPPORTED