GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacescolor.cpp Lines: 99 99 100.0 %
Date: 2021-03-17 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2019  The ManaPlus Developers
6
 *  Copyright (C) 2019-2021  Andrei Karas
7
 *
8
 *  This file is part of The ManaPlus Client.
9
 *
10
 *  This program is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU General Public License as published by
12
 *  the Free Software Foundation; either version 2 of the License, or
13
 *  any later version.
14
 *
15
 *  This program is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU General Public License
21
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 */
23
24
#include "resources/dye/dyepalette.h"
25
26
PRAGMA48(GCC diagnostic push)
27
PRAGMA48(GCC diagnostic ignored "-Wshadow")
28
#ifndef SDL_BIG_ENDIAN
29
#include <SDL_endian.h>
30
#endif  // SDL_BYTEORDER
31
PRAGMA48(GCC diagnostic pop)
32
33
#ifdef SIMD_SUPPORTED
34
// avx2
35
#include <immintrin.h>
36
#endif  // SIMD_SUPPORTED
37
38
#include "debug.h"
39
40
11
void DyePalette::replaceSColorDefault(uint32_t *restrict pixels,
41
                                      const int bufSize) const restrict2
42
{
43
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
44
22
    const size_t sz = mColors.size();
45
11
    if (sz == 0U || pixels == nullptr)
46
        return;
47
11
    if ((sz % 2) != 0U)
48
        -- it_end;
49
50
62
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
51
62
         pixels != p_end;
52
         ++ pixels)
53
    {
54
51
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
55
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
56
        const unsigned int data = (*pixels) & 0x00ffffffU;
57
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
58
59
51
        const unsigned int data = (*pixels) & 0xffffff00U;
60
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
61
62
102
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
63
110
        while (it != it_end)
64
        {
65
99
            const DyeColor &col = *it;
66
99
            ++ it;
67
99
            const DyeColor &col2 = *it;
68
69
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
70
            const unsigned int coldata = (col.value[2] << 16U)
71
                | (col.value[1] << 8U) | (col.value[0]);
72
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
73
74
99
            const unsigned int coldata = (col.value[2] << 8U)
75
99
                | (col.value[1] << 16U) | (col.value[0] << 24U);
76
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
77
78
99
            if (data == coldata)
79
            {
80
40
                p[3] = col2.value[0];
81
40
                p[2] = col2.value[1];
82
40
                p[1] = col2.value[2];
83
40
                break;
84
            }
85
86
            ++ it;
87
        }
88
    }
89
}
90
91
#ifdef SIMD_SUPPORTED
92
/*
93
static void print256(const char *const text, const __m256i &val);
94
static void print256(const char *const text, const __m256i &val)
95
{
96
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
97
}
98
*/
99
100
__attribute__ ((target ("sse2")))
101
11
void DyePalette::replaceSColorSse2(uint32_t *restrict pixels,
102
                                   const int bufSize) const restrict2
103
{
104
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
105
22
    const size_t sz = mColors.size();
106
11
    if (sz == 0U || pixels == nullptr)
107
        return;
108
11
    if ((sz % 2) != 0U)
109
        -- it_end;
110
11
    const int mod = bufSize % 8;
111
11
    const int bufEnd = bufSize - mod;
112
113
17
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
114
    {
115
6
        __m128i mask = _mm_set1_epi32(0xffffff00U);
116
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
117
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
118
12
            &pixels[ptr]));
119
120
12
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
121
18
        while (it != it_end)
122
        {
123
12
            const DyeColor &col = *it;
124
12
            ++ it;
125
12
            const DyeColor &col2 = *it;
126
127
12
            __m128i base2 = _mm_and_si128(mask, base);
128
24
            __m128i newMask = _mm_set1_epi32(col2.valueS);
129
24
            __m128i cmpMask = _mm_set1_epi32(col.valueS);
130
12
            __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
131
12
            cmpRes = _mm_and_si128(mask, cmpRes);
132
12
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
133
12
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
134
12
            base = _mm_or_si128(srcAnd, dstAnd);
135
            ++ it;
136
        }
137
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
138
12
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
139
    }
140
141
    // complete end without simd
142
65
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
143
    {
144
27
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
145
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
146
        const unsigned int data = pixels[ptr] & 0x00ffffffU;
147
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
148
149
27
        const unsigned int data = pixels[ptr] & 0xffffff00U;
150
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
151
152
54
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
153
56
        while (it != it_end)
154
        {
155
51
            const DyeColor &col = *it;
156
51
            ++ it;
157
51
            const DyeColor &col2 = *it;
158
159
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
160
            const unsigned int coldata = (col.value[2] << 16U)
161
                | (col.value[1] << 8U) | (col.value[0]);
162
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
163
164
51
            const unsigned int coldata = (col.value[2] << 8U)
165
51
                | (col.value[1] << 16U) | (col.value[0] << 24U);
166
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
167
168
51
            if (data == coldata)
169
            {
170
22
                p[3] = col2.value[0];
171
22
                p[2] = col2.value[1];
172
22
                p[1] = col2.value[2];
173
22
                break;
174
            }
175
176
            ++ it;
177
        }
178
    }
179
}
180
181
__attribute__ ((target ("avx2")))
182
25
void DyePalette::replaceSColorAvx2(uint32_t *restrict pixels,
183
                                   const int bufSize) const restrict2
184
{
185
50
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
186
50
    const size_t sz = mColors.size();
187
25
    if (sz == 0U || pixels == nullptr)
188
        return;
189
25
    if ((sz % 2) != 0U)
190
        -- it_end;
191
25
    const int mod = bufSize % 8;
192
25
    const int bufEnd = bufSize - mod;
193
194
415
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
195
    {
196
390
        __m256i mask = _mm256_set1_epi32(0xffffff00U);
197
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
198
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
199
780
            &pixels[ptr]));
200
201
780
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
202
2066
        while (it != it_end)
203
        {
204
1676
            const DyeColor &col = *it;
205
1676
            ++ it;
206
1676
            const DyeColor &col2 = *it;
207
208
1676
            __m256i base2 = _mm256_and_si256(mask, base);
209
3352
            __m256i newMask = _mm256_set1_epi32(col2.valueS);
210
3352
            __m256i cmpMask = _mm256_set1_epi32(col.valueS);
211
1676
            __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
212
1676
            cmpRes = _mm256_and_si256(mask, cmpRes);
213
1676
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
214
1676
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
215
1676
            base = _mm256_or_si256(srcAnd, dstAnd);
216
            ++ it;
217
        }
218
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
219
780
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
220
    }
221
222
    // complete end without simd
223
133
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
224
    {
225
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
226
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
227
        const unsigned int data = pixels[ptr] & 0x00ffffffU;
228
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
229
230
54
        const unsigned int data = pixels[ptr] & 0xffffff00U;
231
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
232
233
108
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
234
112
        while (it != it_end)
235
        {
236
102
            const DyeColor &col = *it;
237
102
            ++ it;
238
102
            const DyeColor &col2 = *it;
239
240
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
241
            const unsigned int coldata = (col.value[2] << 16U)
242
                | (col.value[1] << 8U) | (col.value[0]);
243
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
244
245
102
            const unsigned int coldata = (col.value[2] << 8U)
246
102
                | (col.value[1] << 16U) | (col.value[0] << 24U);
247
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
248
249
102
            if (data == coldata)
250
            {
251
44
                p[3] = col2.value[0];
252
44
                p[2] = col2.value[1];
253
44
                p[1] = col2.value[2];
254
44
                break;
255
            }
256
257
            ++ it;
258
        }
259
    }
260
}
261
262
#endif  // SIMD_SUPPORTED