GCC Code Coverage Report
Directory: src/ Exec Total Coverage
File: src/resources/dye/dyepalette_replaceacolor.cpp Lines: 102 102 100.0 %
Date: 2018-09-20 Branches: 32 38 84.2 %

Line Branch Exec Source
1
/*
2
 *  The ManaPlus Client
3
 *  Copyright (C) 2007-2009  The Mana World Development Team
4
 *  Copyright (C) 2009-2010  The Mana Developers
5
 *  Copyright (C) 2011-2018  The ManaPlus Developers
6
 *
7
 *  This file is part of The ManaPlus Client.
8
 *
9
 *  This program is free software; you can redistribute it and/or modify
10
 *  it under the terms of the GNU General Public License as published by
11
 *  the Free Software Foundation; either version 2 of the License, or
12
 *  any later version.
13
 *
14
 *  This program is distributed in the hope that it will be useful,
15
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 *  GNU General Public License for more details.
18
 *
19
 *  You should have received a copy of the GNU General Public License
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "resources/dye/dyepalette.h"
24
25
PRAGMA48(GCC diagnostic push)
26
PRAGMA48(GCC diagnostic ignored "-Wshadow")
27
#ifndef SDL_BIG_ENDIAN
28
#include <SDL_endian.h>
29
#endif  // SDL_BYTEORDER
30
PRAGMA48(GCC diagnostic pop)
31
32
#ifdef SIMD_SUPPORTED
33
// avx2
34
#include <immintrin.h>
35
#endif  // SIMD_SUPPORTED
36
37
#include "debug.h"
38
39
11
void DyePalette::replaceAColorDefault(uint32_t *restrict pixels,
40
                                      const int bufSize) const restrict2
41
{
42
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
43
22
    const size_t sz = mColors.size();
44
11
    if ((sz == 0u) || (pixels == nullptr))
45
        return;
46
11
    if ((sz % 2) != 0u)
47
        -- it_end;
48
49
62
    for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
50
62
         pixels != p_end;
51
         ++pixels)
52
    {
53
51
        uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
54
51
        const unsigned int data = *pixels;
55
56
102
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
57
102
        while (it != it_end)
58
        {
59
83
            const DyeColor &col = *it;
60
83
            ++ it;
61
83
            const DyeColor &col2 = *it;
62
63
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
64
            const unsigned int coldata = (col.value[3] << 24U)
65
                | (col.value[2] << 16U)
66
                | (col.value[1] << 8U)
67
                | (col.value[0]);
68
#else  // SDL_BYTEORDER == SDL_BIG_ENDIAN
69
83
            const unsigned int coldata = (col.value[3])
70
83
                | (col.value[2] << 8U)
71
166
                | (col.value[1] << 16U) |
72
166
                (col.value[0] << 24U);
73
#endif  // SDL_BYTEORDER == SDL_BIG_ENDIAN
74
75
83
            if (data == coldata)
76
            {
77
32
                p[3] = col2.value[0];
78
32
                p[2] = col2.value[1];
79
32
                p[1] = col2.value[2];
80
32
                p[0] = col2.value[3];
81
32
                break;
82
            }
83
84
            ++ it;
85
        }
86
    }
87
}
88
89
#ifdef SIMD_SUPPORTED
90
/*
91
static void print256(const char *const text, const __m256i &val);
92
static void print256(const char *const text, const __m256i &val)
93
{
94
    printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
95
}
96
*/
97
98
__attribute__ ((target ("sse2")))
99
11
void DyePalette::replaceAColorSse2(uint32_t *restrict pixels,
100
                                   const int bufSize) const restrict2
101
{
102
22
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
103
22
    const size_t sz = mColors.size();
104
11
    if ((sz == 0u) || (pixels == nullptr))
105
        return;
106
11
    if ((sz % 2) != 0u)
107
        -- it_end;
108
11
    const int mod = bufSize % 4;
109
11
    const int bufEnd = bufSize - mod;
110
111
20
    for (int ptr = 0; ptr < bufEnd; ptr += 4)
112
    {
113
//        __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(pixels));
114
        __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
115
18
            &pixels[ptr]));
116
117
18
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
118
27
        while (it != it_end)
119
        {
120
18
            const DyeColor &col = *it;
121
18
            ++ it;
122
18
            const DyeColor &col2 = *it;
123
124
36
            __m128i newMask = _mm_set1_epi32(col2.valueA);
125
36
            __m128i cmpMask = _mm_set1_epi32(col.valueA);
126
18
            __m128i cmpRes = _mm_cmpeq_epi32(base, cmpMask);
127
18
            __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
128
18
            __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
129
18
            base = _mm_or_si128(srcAnd, dstAnd);
130
131
            ++ it;
132
        }
133
//        _mm_store_si128(reinterpret_cast<__m128i*>(pixels), base);
134
18
        _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
135
    }
136
137
    // complete end without simd
138
41
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
139
    {
140
15
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
141
15
        const unsigned int data = pixels[ptr];
142
143
30
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
144
27
        while (it != it_end)
145
        {
146
23
            const DyeColor &col = *it;
147
23
            ++ it;
148
23
            const DyeColor &col2 = *it;
149
150
46
            const unsigned int coldata = (col.value[3]) |
151
46
                (col.value[2] << 8U) |
152
46
                (col.value[1] << 16U) |
153
46
                (col.value[0] << 24U);
154
155
23
            if (data == coldata)
156
            {
157
11
                p[3] = col2.value[0];
158
11
                p[2] = col2.value[1];
159
11
                p[1] = col2.value[2];
160
11
                p[0] = col2.value[3];
161
11
                break;
162
            }
163
164
            ++ it;
165
        }
166
    }
167
}
168
169
__attribute__ ((target ("avx2")))
170
23
void DyePalette::replaceAColorAvx2(uint32_t *restrict pixels,
171
                                   const int bufSize) const restrict2
172
{
173
46
    STD_VECTOR<DyeColor>::const_iterator it_end = mColors.end();
174
46
    const size_t sz = mColors.size();
175
23
    if ((sz == 0u) || (pixels == nullptr))
176
        return;
177
23
    if ((sz % 2) != 0u)
178
        -- it_end;
179
23
    const int mod = bufSize % 8;
180
23
    const int bufEnd = bufSize - mod;
181
182
157
    for (int ptr = 0; ptr < bufEnd; ptr += 8)
183
    {
184
//        __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
185
        __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
186
268
            &pixels[ptr]));
187
188
268
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
189
274
        while (it != it_end)
190
        {
191
140
            const DyeColor &col = *it;
192
140
            ++ it;
193
140
            const DyeColor &col2 = *it;
194
195
280
            __m256i newMask = _mm256_set1_epi32(col2.valueA);
196
280
            __m256i cmpMask = _mm256_set1_epi32(col.valueA);
197
140
            __m256i cmpRes = _mm256_cmpeq_epi32(base, cmpMask);
198
140
            __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
199
140
            __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
200
140
            base = _mm256_or_si256(srcAnd, dstAnd);
201
202
            ++ it;
203
        }
204
//        _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
205
268
        _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
206
    }
207
208
    // complete end without simd
209
131
    for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
210
    {
211
54
        uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
212
54
        const unsigned int data = pixels[ptr];
213
214
108
        STD_VECTOR<DyeColor>::const_iterator it = mColors.begin();
215
100
        while (it != it_end)
216
        {
217
84
            const DyeColor &col = *it;
218
84
            ++ it;
219
84
            const DyeColor &col2 = *it;
220
221
168
            const unsigned int coldata = (col.value[3]) |
222
168
                (col.value[2] << 8U) |
223
168
                (col.value[1] << 16U) |
224
168
                (col.value[0] << 24U);
225
226
84
            if (data == coldata)
227
            {
228
38
                p[3] = col2.value[0];
229
38
                p[2] = col2.value[1];
230
38
                p[1] = col2.value[2];
231
38
                p[0] = col2.value[3];
232
38
                break;
233
            }
234
235
            ++ it;
236
        }
237
    }
238
}
239
240
#endif  // SIMD_SUPPORTED