GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replaceaoglcolor.cpp Lines: 84 103 81.6 %
Date: 2018-09-09 Branches: 29 42 69.0 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2018  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#ifdef USE_OPENGL
24
25
#include "resources/dye/dyepalette.h"
26
27
PRAGMA48(GCC diagnostic push)
28
PRAGMA48(GCC diagnostic ignored "-Wshadow")
29
#ifndef SDL_BIG_ENDIAN
30
#include <SDL_endian.h>
31
#endif  // SDL_BYTEORDER
32
PRAGMA48(GCC diagnostic pop)
33
34
#ifdef SIMD_SUPPORTED
35
// avx2
36
#include <immintrin.h>
37
#endif  // SIMD_SUPPORTED
38
39
#include "debug.h"
40
41
6
void DyePalette::replaceAOGLColorDefault(uint32_t *restrict pixels,
42
                                         const int bufSize) const restrict2
43
{
44
12
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
45
12
    const size_t sz = mColors.size();
46
6
    if (sz == 0u || pixels == nullptr)
47
        return;
48
6
    if ((sz % 2) != 0u)
49
        -- it_end;
50
51
23
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
52
23
         pixels != p_end;
53
         ++pixels)
54
    {
55
17
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
56
17
        const unsigned int data = *pixels;
57
58
34
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
59
35
        while (it != it_end)
60
        {
61
31
            const DyeColor &col = *it;
62
31
            ++ it;
63
31
            const DyeColor &col2 = *it;
64
65
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
66
            const unsigned int coldata = (col.value[0] << 24U)
67
                | (col.value[1] << 16U)
68
                | (col.value[2] << 8U)
69
                | col.value[3];
70
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
71
72
31
            const unsigned int coldata = (col.value[0])
73
31
                | (col.value[1] << 8U)
74
31
                | (col.value[2] << 16U)
75
31
                | (col.value[3] << 24U);
76
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
77
78
31
            if (data == coldata)
79
            {
80
13
                p[0] = col2.value[0];
81
13
                p[1] = col2.value[1];
82
13
                p[2] = col2.value[2];
83
13
                p[3] = col2.value[3];
84
13
                break;
85
            }
86
87
            ++ it;
88
        }
89
    }
90
}
91
92
#ifdef SIMD_SUPPORTED
93
/*
94
static void print256(const char *const text, const __m256i &val);
95
static void print256(const char *const text, const __m256i &val)
96
{
97
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
98
}
99
*/
100
101
__attribute__ ((target ("sse2")))
102
1
void DyePalette::replaceAOGLColorSse2(uint32_t *restrict pixels,
103
                                      const int bufSize) const restrict2
104
{
105
2
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
106
2
    const size_t sz = mColors.size();
107
1
    if (sz == 0u || pixels == nullptr)
108
        return;
109
1
    if ((sz % 2) != 0u)
110
        -- it_end;
111
112
1
    if (bufSize >= 8)
113
    {
114
5
        for (int ptr = 0; ptr < bufSize; ptr += 4)
115
        {
116
//            __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
117
//                &pixels[ptr]));
118
            __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
119
4
                &pixels[ptr]));
120
121
4
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
122
6
            while (it != it_end)
123
            {
124
4
                const DyeColor &col = *it;
125
4
                ++ it;
126
4
                const DyeColor &col2 = *it;
127
128
8
                __m128i newMask = _mm_set1_epi32(col2.valueAOgl);
129
8
                __m128i cmpMask = _mm_set1_epi32(col.valueAOgl);
130
4
                __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
131
4
                __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
132
4
                __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
133
4
                base = _mm_or_si128(srcAnd, dstAnd);
134
135
                ++ it;
136
            }
137
//            _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
138
4
            _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
139
        }
140
    }
141
    else
142
    {
143
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
144
             pixels != p_end;
145
             ++pixels)
146
        {
147
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
148
            const unsigned int data = *pixels;
149
150
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
151
            while (it != it_end)
152
            {
153
                const DyeColor &col = *it;
154
                ++ it;
155
                const DyeColor &col2 = *it;
156
157
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
158
                const unsigned int coldata = (col.value[0] << 24U)
159
                    | (col.value[1] << 16U)
160
                    | (col.value[2] << 8U)
161
                    | col.value[3];
162
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
163
164
                const unsigned int coldata = (col.value[0])
165
                    | (col.value[1] << 8U)
166
                    | (col.value[2] << 16U)
167
                    | (col.value[3] << 24U);
168
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
169
170
                if (data == coldata)
171
                {
172
                    p[0] = col2.value[0];
173
                    p[1] = col2.value[1];
174
                    p[2] = col2.value[2];
175
                    p[3] = col2.value[3];
176
                    break;
177
                }
178
179
                ++ it;
180
            }
181
        }
182
    }
183
}
184
185
__attribute__ ((target ("avx2")))
186
7
void DyePalette::replaceAOGLColorAvx2(uint32_t *restrict pixels,
187
                                      const int bufSize) const restrict2
188
{
189
14
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
190
14
    const size_t sz = mColors.size();
191
7
    if (sz == 0u || pixels == nullptr)
192
        return;
193
7
    if ((sz % 2) != 0u)
194
        -- it_end;
195
196
7
    if (bufSize >= 8)
197
    {
198
6
        for (int ptr = 0; ptr < bufSize; ptr += 8)
199
        {
200
//            __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
201
//                &pixels[ptr]));
202
            __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
203
4
                &pixels[ptr]));
204
205
4
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
206
6
            while (it != it_end)
207
            {
208
4
                const DyeColor &col = *it;
209
4
                ++ it;
210
4
                const DyeColor &col2 = *it;
211
212
8
                __m256i newMask = _mm256_set1_epi32(col2.valueAOgl);
213
8
                __m256i cmpMask = _mm256_set1_epi32(col.valueAOgl);
214
4
                __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
215
4
                __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
216
4
                __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
217
4
                base = _mm256_or_si256(srcAnd, dstAnd);
218
219
                ++ it;
220
            }
221
//            _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
222
//                base);
223
2
            _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]),
224
2
                base);
225
        }
226
    }
227
    else
228
    {
229
14
        for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
230
14
             pixels != p_end;
231
             ++pixels)
232
        {
233
9
            uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
234
9
            const unsigned int data = *pixels;
235
236
18
            STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
237
18
            while (it != it_end)
238
            {
239
16
                const DyeColor &col = *it;
240
16
                ++ it;
241
16
                const DyeColor &col2 = *it;
242
243
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
244
                const unsigned int coldata = (col.value[0] << 24U)
245
                    | (col.value[1] << 16U)
246
                    | (col.value[2] << 8U)
247
                    | col.value[3];
248
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
249
250
16
                const unsigned int coldata = (col.value[0])
251
16
                    | (col.value[1] << 8U)
252
16
                    | (col.value[2] << 16U)
253
16
                    | (col.value[3] << 24U);
254
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
255
256
16
                if (data == coldata)
257
                {
258
7
                    p[0] = col2.value[0];
259
7
                    p[1] = col2.value[1];
260
7
                    p[2] = col2.value[2];
261
7
                    p[3] = col2.value[3];
262
7
                    break;
263
                }
264
265
                ++ it;
266
            }
267
        }
268
    }
269
}
270
271
#endif   // SIMD_SUPPORTED
272
#endif  // USE_OPENGL