GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacesoglcolor.cpp Lines: 86 100 86.0 %
Date: 2019-08-19 Branches: 27 38 71.1 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2019  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#ifdef USE_OPENGL
24
25
#include "resources/dye/dyepalette.h"
26
27
PRAGMA48(GCC diagnostic push)
28
PRAGMA48(GCC diagnostic ignored "-Wshadow")
29
#ifndef SDL_BIG_ENDIAN
30
#include <SDL_endian.h>
31
#endif  // SDL_BYTEORDER
32
PRAGMA48(GCC diagnostic pop)
33
34
#ifdef SIMD_SUPPORTED
35
// avx2
36
#include <immintrin.h>
37
#endif  // SIMD_SUPPORTED
38
39
#include "debug.h"
40
41
6
void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
42
                                         const int bufSize) const restrict2
43
{
44
12
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
45
12
    const size_t sz = mColors.size();
46
6
    if ((sz == 0U) || (pixels == nullptr))
47
        return;
48
6
    if ((sz % 2) != 0U)
49
        -- it_end;
50
51
23
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
52
23
         pixels != p_end;
53
         ++pixels)
54
    {
55
17
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
56
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
57
        const unsigned int data = (*pixels) & 0xffffff00;
58
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
59
60
17
        const unsigned int data = (*pixels) & 0x00ffffff;
61
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
62
63
34
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
64
24
        while (it != it_end)
65
        {
66
20
            const DyeColor &col = *it;
67
20
            ++ it;
68
20
            const DyeColor &col2 = *it;
69
70
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
71
            const unsigned int coldata = (col.value[0] << 24)
72
                | (col.value[1] << 16) | (col.value[2] << 8);
73
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
74
75
20
            const unsigned int coldata = (col.value[0])
76
20
                | (col.value[1] << 8) | (col.value[2] << 16);
77
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
78
79
20
            if (data == coldata)
80
            {
81
13
                p[0] = col2.value[0];
82
13
                p[1] = col2.value[1];
83
13
                p[2] = col2.value[2];
84
13
                break;
85
            }
86
87
            ++ it;
88
        }
89
    }
90
}
91
92
#ifdef SIMD_SUPPORTED
93
/*
94
static void print256(const char *const text, const __m256i &val);
95
static void print256(const char *const text, const __m256i &val)
96
{
97
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
98
}
99
*/
100
101
__attribute__ ((target ("sse2")))
102
1
void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
103
                                      const int bufSize) const restrict2
104
{
105
2
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
106
2
    const size_t sz = mColors.size();
107
1
    if ((sz == 0U) || (pixels == nullptr))
108
        return;
109
1
    if ((sz % 2) != 0U)
110
        -- it_end;
111
112
1
    const int mod = bufSize % 4;
113
1
    const int bufEnd = bufSize - mod;
114
115
3
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
116
    {
117
2
        __m128i mask = _mm_set1_epi32(0x00ffffff);
118
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
119
//         &pixels[ptr]));
120
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
121
4
            &pixels[ptr]));
122
123
4
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
124
6
        while (it != it_end)
125
        {
126
4
            const DyeColor &col = *it;
127
4
            ++ it;
128
4
            const DyeColor &col2 = *it;
129
130
4
            __m128i base2 = _mm_and_si128(mask, base);
131
8
            __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
132
8
            __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
133
4
            __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
134
4
            cmpRes = _mm_and_si128(mask, cmpRes);
135
4
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
136
4
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
137
4
            base = _mm_or_si128(srcAnd, dstAnd);
138
            ++ it;
139
        }
140
//        _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
141
4
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
142
    }
143
144
1
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
145
    {
146
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
147
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
148
        const unsigned int data = pixels[ptr] & 0xffffff00;
149
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
150
151
        const unsigned int data = pixels[ptr] & 0x00ffffff;
152
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
153
154
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
155
        while (it != it_end)
156
        {
157
            const DyeColor &col = *it;
158
            ++ it;
159
            const DyeColor &col2 = *it;
160
161
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
162
            const unsigned int coldata = (col.value[0] << 24)
163
                | (col.value[1] << 16) | (col.value[2] << 8);
164
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
165
166
            const unsigned int coldata = (col.value[0])
167
                | (col.value[1] << 8) | (col.value[2] << 16);
168
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
169
170
            if (data == coldata)
171
            {
172
                p[0] = col2.value[0];
173
                p[1] = col2.value[1];
174
                p[2] = col2.value[2];
175
                break;
176
            }
177
178
            ++ it;
179
        }
180
    }
181
}
182
183
__attribute__ ((target ("avx2")))
184
7
void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
185
                                      const int bufSize) const restrict2
186
{
187
14
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
188
14
    const size_t sz = mColors.size();
189
7
    if ((sz == 0U) || (pixels == nullptr))
190
        return;
191
7
    if ((sz % 2) != 0U)
192
        -- it_end;
193
194
7
    const int mod = bufSize % 8;
195
7
    const int bufEnd = bufSize - mod;
196
197
9
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
198
    {
199
2
        __m256i mask = _mm256_set1_epi32(0x00ffffff);
200
//          __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
201
//              &pixels[ptr]));
202
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
203
4
            &pixels[ptr]));
204
205
4
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
206
6
        while (it != it_end)
207
        {
208
4
            const DyeColor &col = *it;
209
4
            ++ it;
210
4
            const DyeColor &col2 = *it;
211
212
4
            __m256i base2 = _mm256_and_si256(mask, base);
213
8
            __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
214
8
            __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
215
4
            __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
216
4
            cmpRes = _mm256_and_si256(mask, cmpRes);
217
4
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
218
4
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
219
4
            base = _mm256_or_si256(srcAnd, dstAnd);
220
            ++ it;
221
        }
222
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
223
//                base);
224
2
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
225
2
            base);
226
    }
227
228
25
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
229
    {
230
9
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
231
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
232
        const unsigned int data = pixels[ptr] & 0xffffff00;
233
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
234
235
9
        const unsigned int data = pixels[ptr] & 0x00ffffff;
236
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
237
238
18
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
239
12
        while (it != it_end)
240
        {
241
10
            const DyeColor &col = *it;
242
10
            ++ it;
243
10
            const DyeColor &col2 = *it;
244
245
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
246
            const unsigned int coldata = (col.value[0] << 24)
247
                | (col.value[1] << 16) | (col.value[2] << 8);
248
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
249
250
10
            const unsigned int coldata = (col.value[0])
251
10
                | (col.value[1] << 8) | (col.value[2] << 16);
252
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
253
254
10
            if (data == coldata)
255
            {
256
7
                p[0] = col2.value[0];
257
7
                p[1] = col2.value[1];
258
7
                p[2] = col2.value[2];
259
7
                break;
260
            }
261
262
            ++ it;
263
        }
264
    }
265
}
266
267
#endif  // SIMD_SUPPORTED
268
#endif  // USE_OPENGL