GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replaceacolor.cpp Lines: 102 102 100.0 %
Date: 2021-03-17 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2019  The ManaPlus Developers
6
 *  Copyright (C) 2019-2021  Andrei Karas
7
 *
8
 *  This file is part of The ManaPlus Client.
9
 *
10
 *  This program is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU General Public License as published by
12
 *  the Free Software Foundation; either version 2 of the License, or
13
 *  any later version.
14
 *
15
 *  This program is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU General Public License
21
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 */
23
24
#include "resources/dye/dyepalette.h"
25
26
PRAGMA48(GCC diagnostic push)
27
PRAGMA48(GCC diagnostic ignored "-Wshadow")
28
#ifndef SDL_BIG_ENDIAN
29
#include <SDL_endian.h>
30
#endif  // SDL_BYTEORDER
31
PRAGMA48(GCC diagnostic pop)
32
33
#ifdef SIMD_SUPPORTED
34
// avx2
35
#include <immintrin.h>
36
#endif  // SIMD_SUPPORTED
37
38
#include "debug.h"
39
40
11
void DyePalette::replaceAColorDefault(uint32_t *restrict pixels,
41
                                      const int bufSize) const restrict2
42
{
43
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
44
22
    const size_t sz = mColors.size();
45
11
    if ((sz == 0U) || (pixels == nullptr))
46
        return;
47
11
    if ((sz % 2) != 0U)
48
        -- it_end;
49
50
62
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
51
62
         pixels != p_end;
52
         ++pixels)
53
    {
54
51
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
55
51
        const unsigned int data = *pixels;
56
57
102
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
58
102
        while (it != it_end)
59
        {
60
83
            const DyeColor &col = *it;
61
83
            ++ it;
62
83
            const DyeColor &col2 = *it;
63
64
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
65
            const unsigned int coldata = (col.value[3] << 24U)
66
                | (col.value[2] << 16U)
67
                | (col.value[1] << 8U)
68
                | (col.value[0]);
69
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
70
83
            const unsigned int coldata = (col.value[3])
71
83
                | (col.value[2] << 8U)
72
166
                | (col.value[1] << 16U) |
73
166
                (col.value[0] << 24U);
74
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
75
76
83
            if (data == coldata)
77
            {
78
32
                p[3] = col2.value[0];
79
32
                p[2] = col2.value[1];
80
32
                p[1] = col2.value[2];
81
32
                p[0] = col2.value[3];
82
32
                break;
83
            }
84
85
            ++ it;
86
        }
87
    }
88
}
89
90
#ifdef SIMD_SUPPORTED
91
/*
92
static void print256(const char *const text, const __m256i &val);
93
static void print256(const char *const text, const __m256i &val)
94
{
95
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
96
}
97
*/
98
99
__attribute__ ((target ("sse2")))
100
11
void DyePalette::replaceAColorSse2(uint32_t *restrict pixels,
101
                                   const int bufSize) const restrict2
102
{
103
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
104
22
    const size_t sz = mColors.size();
105
11
    if ((sz == 0U) || (pixels == nullptr))
106
        return;
107
11
    if ((sz % 2) != 0U)
108
        -- it_end;
109
11
    const int mod = bufSize % 4;
110
11
    const int bufEnd = bufSize - mod;
111
112
20
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
113
    {
114
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
115
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
116
18
            &pixels[ptr]));
117
118
18
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
119
27
        while (it != it_end)
120
        {
121
18
            const DyeColor &col = *it;
122
18
            ++ it;
123
18
            const DyeColor &col2 = *it;
124
125
36
            __m128i newMask = _mm_set1_epi32(col2.valueA);
126
36
            __m128i cmpMask = _mm_set1_epi32(col.valueA);
127
18
            __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
128
18
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
129
18
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
130
18
            base = _mm_or_si128(srcAnd, dstAnd);
131
132
            ++ it;
133
        }
134
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
135
18
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
136
    }
137
138
    // complete end without simd
139
41
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
140
    {
141
15
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
142
15
        const unsigned int data = pixels[ptr];
143
144
30
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
145
27
        while (it != it_end)
146
        {
147
23
            const DyeColor &col = *it;
148
23
            ++ it;
149
23
            const DyeColor &col2 = *it;
150
151
46
            const unsigned int coldata = (col.value[3]) |
152
46
                (col.value[2] << 8U) |
153
46
                (col.value[1] << 16U) |
154
46
                (col.value[0] << 24U);
155
156
23
            if (data == coldata)
157
            {
158
11
                p[3] = col2.value[0];
159
11
                p[2] = col2.value[1];
160
11
                p[1] = col2.value[2];
161
11
                p[0] = col2.value[3];
162
11
                break;
163
            }
164
165
            ++ it;
166
        }
167
    }
168
}
169
170
__attribute__ ((target ("avx2")))
171
23
void DyePalette::replaceAColorAvx2(uint32_t *restrict pixels,
172
                                   const int bufSize) const restrict2
173
{
174
46
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
175
46
    const size_t sz = mColors.size();
176
23
    if ((sz == 0U) || (pixels == nullptr))
177
        return;
178
23
    if ((sz % 2) != 0U)
179
        -- it_end;
180
23
    const int mod = bufSize % 8;
181
23
    const int bufEnd = bufSize - mod;
182
183
157
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
184
    {
185
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
186
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
187
268
            &pixels[ptr]));
188
189
268
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
190
274
        while (it != it_end)
191
        {
192
140
            const DyeColor &col = *it;
193
140
            ++ it;
194
140
            const DyeColor &col2 = *it;
195
196
280
            __m256i newMask = _mm256_set1_epi32(col2.valueA);
197
280
            __m256i cmpMask = _mm256_set1_epi32(col.valueA);
198
140
            __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
199
140
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
200
140
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
201
140
            base = _mm256_or_si256(srcAnd, dstAnd);
202
203
            ++ it;
204
        }
205
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
206
268
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
207
    }
208
209
    // complete end without simd
210
131
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
211
    {
212
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
213
54
        const unsigned int data = pixels[ptr];
214
215
108
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
216
100
        while (it != it_end)
217
        {
218
84
            const DyeColor &col = *it;
219
84
            ++ it;
220
84
            const DyeColor &col2 = *it;
221
222
168
            const unsigned int coldata = (col.value[3]) |
223
168
                (col.value[2] << 8U) |
224
168
                (col.value[1] << 16U) |
225
168
                (col.value[0] << 24U);
226
227
84
            if (data == coldata)
228
            {
229
38
                p[3] = col2.value[0];
230
38
                p[2] = col2.value[1];
231
38
                p[1] = col2.value[2];
232
38
                p[0] = col2.value[3];
233
38
                break;
234
            }
235
236
            ++ it;
237
        }
238
    }
239
}
240
241
#endif  // SIMD_SUPPORTED