GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacesoglcolor.cpp Lines: 83 99 83.8 %
Date: 2018-05-24 20:11:55 Branches: 29 42 69.0 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2018  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#ifdef USE_OPENGL
24
25
#include "resources/dye/dyepalette.h"
26
27
PRAGMA48(GCC diagnostic push)
28
PRAGMA48(GCC diagnostic ignored "-Wshadow")
29
#ifndef SDL_BIG_ENDIAN
30
#include <SDL_endian.h>
31
#endif  // SDL_BYTEORDER
32
PRAGMA48(GCC diagnostic pop)
33
34
#ifdef SIMD_SUPPORTED
35
// avx2
36
#include <immintrin.h>
37
#endif  // SIMD_SUPPORTED
38
39
#include "debug.h"
40
41
12
void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
42
                                         const int bufSize) const restrict2
43
{
44
24
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
45
24
    const size_t sz = mColors.size();
46
12
    if ((sz == 0u) || (pixels == nullptr))
47
        return;
48
12
    if ((sz % 2) != 0u)
49
        -- it_end;
50
51
46
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
52
46
         pixels != p_end;
53
         ++pixels)
54
    {
55
34
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
56
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
57
        const unsigned int data = (*pixels) & 0xffffff00;
58
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
59
60
34
        const unsigned int data = (*pixels) & 0x00ffffff;
61
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
62
63
68
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
64
48
        while (it != it_end)
65
        {
66
40
            const DyeColor &col = *it;
67
40
            ++ it;
68
40
            const DyeColor &col2 = *it;
69
70
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
71
            const unsigned int coldata = (col.value[0] << 24)
72
                | (col.value[1] << 16) | (col.value[2] << 8);
73
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
74
75
80
            const unsigned int coldata = (col.value[0])
76
40
                | (col.value[1] << 8) | (col.value[2] << 16);
77
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
78
79
40
            if (data == coldata)
80
            {
81
26
                p[0] = col2.value[0];
82
26
                p[1] = col2.value[1];
83
26
                p[2] = col2.value[2];
84
26
                break;
85
            }
86
87
            ++ it;
88
        }
89
    }
90
}
91
92
#ifdef SIMD_SUPPORTED
93
/*
94
static void print256(const char *const text, const __m256i &val);
95
static void print256(const char *const text, const __m256i &val)
96
{
97
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
98
}
99
*/
100
101
__attribute__ ((target ("sse2")))
102
2
void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
103
                                      const int bufSize) const restrict2
104
{
105
4
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
106
4
    const size_t sz = mColors.size();
107
2
    if ((sz == 0u) || (pixels == nullptr))
108
        return;
109
2
    if ((sz % 2) != 0u)
110
        -- it_end;
111
112
2
    if (bufSize >= 8)
113
    {
114
10
        for (int ptr = 0; ptr < bufSize; ptr += 4)
115
        {
116
4
            __m128i mask = _mm_set1_epi32(0x00ffffff);
117
//            __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
118
//             &pixels[ptr]));
119
            __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
120
8
                &pixels[ptr]));
121
122
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
123
12
            while (it != it_end)
124
            {
125
8
                const DyeColor &col = *it;
126
8
                ++ it;
127
8
                const DyeColor &col2 = *it;
128
129
8
                __m128i base2 = _mm_and_si128(mask, base);
130
16
                __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
131
16
                __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
132
8
                __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
133
8
                cmpRes = _mm_and_si128(mask, cmpRes);
134
8
                __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
135
8
                __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
136
8
                base = _mm_or_si128(srcAnd, dstAnd);
137
                ++ it;
138
            }
139
//            _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
140
8
            _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
141
        }
142
    }
143
    else
144
    {
145
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
146
             pixels != p_end;
147
             ++pixels)
148
        {
149
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
150
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
151
            const unsigned int data = (*pixels) & 0xffffff00;
152
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
153
154
            const unsigned int data = (*pixels) & 0x00ffffff;
155
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
156
157
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
158
            while (it != it_end)
159
            {
160
                const DyeColor &col = *it;
161
                ++ it;
162
                const DyeColor &col2 = *it;
163
164
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
165
                const unsigned int coldata = (col.value[0] << 24)
166
                    | (col.value[1] << 16) | (col.value[2] << 8);
167
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
168
169
                const unsigned int coldata = (col.value[0])
170
                    | (col.value[1] << 8) | (col.value[2] << 16);
171
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
172
173
                if (data == coldata)
174
                {
175
                    p[0] = col2.value[0];
176
                    p[1] = col2.value[1];
177
                    p[2] = col2.value[2];
178
                    break;
179
                }
180
181
                ++ it;
182
            }
183
        }
184
    }
185
}
186
187
__attribute__ ((target ("avx2")))
188
14
void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
189
                                      const int bufSize) const restrict2
190
{
191
28
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
192
28
    const size_t sz = mColors.size();
193
14
    if ((sz == 0u) || (pixels == nullptr))
194
        return;
195
14
    if ((sz % 2) != 0u)
196
        -- it_end;
197
198
14
    if (bufSize >= 8)
199
    {
200
12
        for (int ptr = 0; ptr < bufSize; ptr += 8)
201
        {
202
4
            __m256i mask = _mm256_set1_epi32(0x00ffffff);
203
//          __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
204
//              &pixels[ptr]));
205
            __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
206
8
                &pixels[ptr]));
207
208
8
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
209
12
            while (it != it_end)
210
            {
211
8
                const DyeColor &col = *it;
212
8
                ++ it;
213
8
                const DyeColor &col2 = *it;
214
215
8
                __m256i base2 = _mm256_and_si256(mask, base);
216
16
                __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
217
16
                __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
218
8
                __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
219
8
                cmpRes = _mm256_and_si256(mask, cmpRes);
220
8
                __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
221
8
                __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
222
8
                base = _mm256_or_si256(srcAnd, dstAnd);
223
                ++ it;
224
            }
225
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
226
//                base);
227
8
            _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
228
                base);
229
        }
230
    }
231
    else
232
    {
233
28
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
234
28
             pixels != p_end;
235
             ++pixels)
236
        {
237
18
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
238
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
239
            const unsigned int data = (*pixels) & 0xffffff00;
240
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
241
242
18
            const unsigned int data = (*pixels) & 0x00ffffff;
243
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
244
245
36
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
246
24
            while (it != it_end)
247
            {
248
20
                const DyeColor &col = *it;
249
20
                ++ it;
250
20
                const DyeColor &col2 = *it;
251
252
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
253
                const unsigned int coldata = (col.value[0] << 24)
254
                    | (col.value[1] << 16) | (col.value[2] << 8);
255
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
256
257
40
                const unsigned int coldata = (col.value[0])
258
20
                    | (col.value[1] << 8) | (col.value[2] << 16);
259
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
260
261
20
                if (data == coldata)
262
                {
263
14
                    p[0] = col2.value[0];
264
14
                    p[1] = col2.value[1];
265
14
                    p[2] = col2.value[2];
266
14
                    break;
267
                }
268
269
                ++ it;
270
            }
271
        }
272
    }
273
}
274
275
#endif  // SIMD_SUPPORTED
276
#endif  // USE_OPENGL