GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacesoglcolor.cpp Lines: 86 100 86.0 %
Date: 2021-03-17 Branches: 27 38 71.1 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2019  The ManaPlus Developers
6
 *  Copyright (C) 2019-2021  Andrei Karas
7
 *
8
 *  This file is part of The ManaPlus Client.
9
 *
10
 *  This program is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU General Public License as published by
12
 *  the Free Software Foundation; either version 2 of the License, or
13
 *  any later version.
14
 *
15
 *  This program is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU General Public License
21
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 */
23
24
#ifdef USE_OPENGL
25
26
#include "resources/dye/dyepalette.h"
27
28
PRAGMA48(GCC diagnostic push)
29
PRAGMA48(GCC diagnostic ignored "-Wshadow")
30
#ifndef SDL_BIG_ENDIAN
31
#include <SDL_endian.h>
32
#endif  // SDL_BYTEORDER
33
PRAGMA48(GCC diagnostic pop)
34
35
#ifdef SIMD_SUPPORTED
36
// avx2
37
#include <immintrin.h>
38
#endif  // SIMD_SUPPORTED
39
40
#include "debug.h"
41
42
6
void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
43
                                         const int bufSize) const restrict2
44
{
45
12
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
46
12
    const size_t sz = mColors.size();
47
6
    if ((sz == 0U) || (pixels == nullptr))
48
        return;
49
6
    if ((sz % 2) != 0U)
50
        -- it_end;
51
52
23
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
53
23
         pixels != p_end;
54
         ++pixels)
55
    {
56
17
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
57
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
58
        const unsigned int data = (*pixels) & 0xffffff00;
59
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
60
61
17
        const unsigned int data = (*pixels) & 0x00ffffff;
62
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
63
64
34
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
65
24
        while (it != it_end)
66
        {
67
20
            const DyeColor &col = *it;
68
20
            ++ it;
69
20
            const DyeColor &col2 = *it;
70
71
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
72
            const unsigned int coldata = (col.value[0] << 24)
73
                | (col.value[1] << 16) | (col.value[2] << 8);
74
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
75
76
20
            const unsigned int coldata = (col.value[0])
77
20
                | (col.value[1] << 8) | (col.value[2] << 16);
78
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
79
80
20
            if (data == coldata)
81
            {
82
13
                p[0] = col2.value[0];
83
13
                p[1] = col2.value[1];
84
13
                p[2] = col2.value[2];
85
13
                break;
86
            }
87
88
            ++ it;
89
        }
90
    }
91
}
92
93
#ifdef SIMD_SUPPORTED
94
/*
95
static void print256(const char *const text, const __m256i &val);
96
static void print256(const char *const text, const __m256i &val)
97
{
98
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
99
}
100
*/
101
102
__attribute__ ((target ("sse2")))
103
1
void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
104
                                      const int bufSize) const restrict2
105
{
106
2
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
107
2
    const size_t sz = mColors.size();
108
1
    if ((sz == 0U) || (pixels == nullptr))
109
        return;
110
1
    if ((sz % 2) != 0U)
111
        -- it_end;
112
113
1
    const int mod = bufSize % 4;
114
1
    const int bufEnd = bufSize - mod;
115
116
3
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
117
    {
118
2
        __m128i mask = _mm_set1_epi32(0x00ffffff);
119
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
120
//         &pixels[ptr]));
121
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
122
4
            &pixels[ptr]));
123
124
4
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
125
6
        while (it != it_end)
126
        {
127
4
            const DyeColor &col = *it;
128
4
            ++ it;
129
4
            const DyeColor &col2 = *it;
130
131
4
            __m128i base2 = _mm_and_si128(mask, base);
132
8
            __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
133
8
            __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
134
4
            __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
135
4
            cmpRes = _mm_and_si128(mask, cmpRes);
136
4
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
137
4
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
138
4
            base = _mm_or_si128(srcAnd, dstAnd);
139
            ++ it;
140
        }
141
//        _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
142
4
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
143
    }
144
145
1
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
146
    {
147
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
148
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
149
        const unsigned int data = pixels[ptr] & 0xffffff00;
150
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
151
152
        const unsigned int data = pixels[ptr] & 0x00ffffff;
153
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
154
155
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
156
        while (it != it_end)
157
        {
158
            const DyeColor &col = *it;
159
            ++ it;
160
            const DyeColor &col2 = *it;
161
162
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
163
            const unsigned int coldata = (col.value[0] << 24)
164
                | (col.value[1] << 16) | (col.value[2] << 8);
165
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
166
167
            const unsigned int coldata = (col.value[0])
168
                | (col.value[1] << 8) | (col.value[2] << 16);
169
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
170
171
            if (data == coldata)
172
            {
173
                p[0] = col2.value[0];
174
                p[1] = col2.value[1];
175
                p[2] = col2.value[2];
176
                break;
177
            }
178
179
            ++ it;
180
        }
181
    }
182
}
183
184
__attribute__ ((target ("avx2")))
185
7
void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
186
                                      const int bufSize) const restrict2
187
{
188
14
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
189
14
    const size_t sz = mColors.size();
190
7
    if ((sz == 0U) || (pixels == nullptr))
191
        return;
192
7
    if ((sz % 2) != 0U)
193
        -- it_end;
194
195
7
    const int mod = bufSize % 8;
196
7
    const int bufEnd = bufSize - mod;
197
198
9
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
199
    {
200
2
        __m256i mask = _mm256_set1_epi32(0x00ffffff);
201
//          __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
202
//              &pixels[ptr]));
203
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
204
4
            &pixels[ptr]));
205
206
4
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
207
6
        while (it != it_end)
208
        {
209
4
            const DyeColor &col = *it;
210
4
            ++ it;
211
4
            const DyeColor &col2 = *it;
212
213
4
            __m256i base2 = _mm256_and_si256(mask, base);
214
8
            __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
215
8
            __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
216
4
            __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
217
4
            cmpRes = _mm256_and_si256(mask, cmpRes);
218
4
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
219
4
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
220
4
            base = _mm256_or_si256(srcAnd, dstAnd);
221
            ++ it;
222
        }
223
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
224
//                base);
225
2
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
226
2
            base);
227
    }
228
229
25
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
230
    {
231
9
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
232
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
233
        const unsigned int data = pixels[ptr] & 0xffffff00;
234
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
235
236
9
        const unsigned int data = pixels[ptr] & 0x00ffffff;
237
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
238
239
18
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
240
12
        while (it != it_end)
241
        {
242
10
            const DyeColor &col = *it;
243
10
            ++ it;
244
10
            const DyeColor &col2 = *it;
245
246
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
247
            const unsigned int coldata = (col.value[0] << 24)
248
                | (col.value[1] << 16) | (col.value[2] << 8);
249
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
250
251
10
            const unsigned int coldata = (col.value[0])
252
10
                | (col.value[1] << 8) | (col.value[2] << 16);
253
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
254
255
10
            if (data == coldata)
256
            {
257
7
                p[0] = col2.value[0];
258
7
                p[1] = col2.value[1];
259
7
                p[2] = col2.value[2];
260
7
                break;
261
            }
262
263
            ++ it;
264
        }
265
    }
266
}
267
268
#endif  // SIMD_SUPPORTED
269
#endif  // USE_OPENGL