GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replacescolor.cpp Lines: 99 99 100.0 %
Date: 2018-06-18 21:15:20 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2018  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "resources/dye/dyepalette.h"
24
25
PRAGMA48(GCC diagnostic push)
26
PRAGMA48(GCC diagnostic ignored "-Wshadow")
27
#ifndef SDL_BIG_ENDIAN
28
#include <SDL_endian.h>
29
#endif  // SDL_BYTEORDER
30
PRAGMA48(GCC diagnostic pop)
31
32
#ifdef SIMD_SUPPORTED
33
// avx2
34
#include <immintrin.h>
35
#endif  // SIMD_SUPPORTED
36
37
#include "debug.h"
38
39
22
void DyePalette::replaceSColorDefault(uint32_t *restrict pixels,
40
                                      const int bufSize) const restrict2
41
{
42
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
43
44
    const size_t sz = mColors.size();
44
22
    if (sz == 0u || pixels == nullptr)
45
        return;
46
22
    if ((sz % 2) != 0u)
47
        -- it_end;
48
49
124
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
50
124
         pixels != p_end;
51
         ++ pixels)
52
    {
53
102
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
54
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
55
        const unsigned int data = (*pixels) & 0x00ffffff;
56
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
57
58
102
        const unsigned int data = (*pixels) & 0xffffff00;
59
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
60
61
204
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
62
220
        while (it != it_end)
63
        {
64
198
            const DyeColor &col = *it;
65
198
            ++ it;
66
198
            const DyeColor &col2 = *it;
67
68
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
69
            const unsigned int coldata = (col.value[2] << 16U)
70
                | (col.value[1] << 8U) | (col.value[0]);
71
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
72
73
396
            const unsigned int coldata = (col.value[2] << 8U)
74
198
                | (col.value[1] << 16U) | (col.value[0] << 24U);
75
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
76
77
198
            if (data == coldata)
78
            {
79
80
                p[3] = col2.value[0];
80
80
                p[2] = col2.value[1];
81
80
                p[1] = col2.value[2];
82
80
                break;
83
            }
84
85
            ++ it;
86
        }
87
    }
88
}
89
90
#ifdef SIMD_SUPPORTED
91
/*
92
static void print256(const char *const text, const __m256i &val);
93
static void print256(const char *const text, const __m256i &val)
94
{
95
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
96
}
97
*/
98
99
__attribute__ ((target ("sse2")))
100
22
void DyePalette::replaceSColorSse2(uint32_t *restrict pixels,
101
                                   const int bufSize) const restrict2
102
{
103
44
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
104
44
    const size_t sz = mColors.size();
105
22
    if (sz == 0u || pixels == nullptr)
106
        return;
107
22
    if ((sz % 2) != 0u)
108
        -- it_end;
109
22
    const int mod = bufSize % 8;
110
22
    const int bufEnd = bufSize - mod;
111
112
34
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
113
    {
114
12
        __m128i mask = _mm_set1_epi32(0xffffff00);
115
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
116
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
117
24
            &pixels[ptr]));
118
119
24
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
120
36
        while (it != it_end)
121
        {
122
24
            const DyeColor &col = *it;
123
24
            ++ it;
124
24
            const DyeColor &col2 = *it;
125
126
24
            __m128i base2 = _mm_and_si128(mask, base);
127
48
            __m128i newMask = _mm_set1_epi32(col2.valueS);
128
48
            __m128i cmpMask = _mm_set1_epi32(col.valueS);
129
24
            __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
130
24
            cmpRes = _mm_and_si128(mask, cmpRes);
131
24
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
132
24
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
133
24
            base = _mm_or_si128(srcAnd, dstAnd);
134
            ++ it;
135
        }
136
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
137
24
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
138
    }
139
140
    // complete end without simd
141
130
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
142
    {
143
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
144
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
145
        const unsigned int data = pixels[ptr] & 0x00ffffff;
146
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
147
148
54
        const unsigned int data = pixels[ptr] & 0xffffff00;
149
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
150
151
108
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
152
112
        while (it != it_end)
153
        {
154
102
            const DyeColor &col = *it;
155
102
            ++ it;
156
102
            const DyeColor &col2 = *it;
157
158
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
159
            const unsigned int coldata = (col.value[2] << 16U)
160
                | (col.value[1] << 8U) | (col.value[0]);
161
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
162
163
204
            const unsigned int coldata = (col.value[2] << 8U)
164
102
                | (col.value[1] << 16U) | (col.value[0] << 24U);
165
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
166
167
102
            if (data == coldata)
168
            {
169
44
                p[3] = col2.value[0];
170
44
                p[2] = col2.value[1];
171
44
                p[1] = col2.value[2];
172
44
                break;
173
            }
174
175
            ++ it;
176
        }
177
    }
178
}
179
180
__attribute__ ((target ("avx2")))
181
50
void DyePalette::replaceSColorAvx2(uint32_t *restrict pixels,
182
                                   const int bufSize) const restrict2
183
{
184
100
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
185
100
    const size_t sz = mColors.size();
186
50
    if (sz == 0u || pixels == nullptr)
187
        return;
188
50
    if ((sz % 2) != 0u)
189
        -- it_end;
190
50
    const int mod = bufSize % 8;
191
50
    const int bufEnd = bufSize - mod;
192
193
830
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
194
    {
195
780
        __m256i mask = _mm256_set1_epi32(0xffffff00);
196
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
197
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
198
1560
            &pixels[ptr]));
199
200
1560
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
201
4132
        while (it != it_end)
202
        {
203
3352
            const DyeColor &col = *it;
204
3352
            ++ it;
205
3352
            const DyeColor &col2 = *it;
206
207
3352
            __m256i base2 = _mm256_and_si256(mask, base);
208
6704
            __m256i newMask = _mm256_set1_epi32(col2.valueS);
209
6704
            __m256i cmpMask = _mm256_set1_epi32(col.valueS);
210
3352
            __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
211
3352
            cmpRes = _mm256_and_si256(mask, cmpRes);
212
3352
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
213
3352
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
214
3352
            base = _mm256_or_si256(srcAnd, dstAnd);
215
            ++ it;
216
        }
217
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
218
1560
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
219
    }
220
221
    // complete end without simd
222
266
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
223
    {
224
108
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
225
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
226
        const unsigned int data = pixels[ptr] & 0x00ffffff;
227
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
228
229
108
        const unsigned int data = pixels[ptr] & 0xffffff00;
230
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
231
232
216
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
233
224
        while (it != it_end)
234
        {
235
204
            const DyeColor &col = *it;
236
204
            ++ it;
237
204
            const DyeColor &col2 = *it;
238
239
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
240
            const unsigned int coldata = (col.value[2] << 16U)
241
                | (col.value[1] << 8U) | (col.value[0]);
242
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
243
244
408
            const unsigned int coldata = (col.value[2] << 8U)
245
204
                | (col.value[1] << 16U) | (col.value[0] << 24U);
246
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
247
248
204
            if (data == coldata)
249
            {
250
88
                p[3] = col2.value[0];
251
88
                p[2] = col2.value[1];
252
88
                p[1] = col2.value[2];
253
88
                break;
254
            }
255
256
            ++ it;
257
        }
258
    }
259
}
260
261
#endif  // SIMD_SUPPORTED