Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Source / platform / graphics / cpu / arm / WebGLImageConversionNEON.h
1 /*
2  * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #ifndef WebGLImageConversionNEON_h
27 #define WebGLImageConversionNEON_h
28
29 #if HAVE(ARM_NEON_INTRINSICS)
30
31 #include <arm_neon.h>
32
33 namespace blink {
34
35 namespace SIMD {
36
37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
38 {
39     unsigned componentsPerRow = pixelsPerRow * 4;
40     unsigned tailComponents = componentsPerRow % 16;
41     unsigned componentsSize = componentsPerRow - tailComponents;
42     const uint8_t* src = reinterpret_cast<const uint8_t*>(source);
43
44     for (unsigned i = 0; i < componentsSize; i += 16) {
45         uint8x16x2_t components = vld2q_u8(src + i * 2);
46         vst1q_u8(destination + i, components.val[1]);
47     }
48
49     source += componentsSize;
50     destination += componentsSize;
51     pixelsPerRow = tailComponents / 4;
52 }
53
54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
55 {
56     unsigned componentsPerRow = pixelsPerRow * 3;
57     unsigned tailComponents = componentsPerRow % 24;
58     unsigned componentsSize = componentsPerRow - tailComponents;
59
60     uint8x8_t componentA = vdup_n_u8(0xFF);
61     for (unsigned i = 0; i < componentsSize; i += 24) {
62         uint16x8x3_t RGB16 = vld3q_u16(source + i);
63         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8));
64         uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8));
65         uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8));
66         uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
67         vst4_u8(destination, RGBA8);
68         destination += 32;
69     }
70
71     source += componentsSize;
72     pixelsPerRow = tailComponents / 3;
73 }
74
75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
76 {
77     unsigned componentsPerRow = pixelsPerRow * 4;
78     unsigned tailComponents = componentsPerRow % 32;
79     unsigned componentsSize = componentsPerRow - tailComponents;
80
81     for (unsigned i = 0; i < componentsSize; i += 32) {
82         uint16x8x4_t ARGB16 = vld4q_u16(source + i);
83         uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
84         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
85         uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
86         uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
87         uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
88         vst4_u8(destination + i, RGBA8);
89     }
90
91     source += componentsSize;
92     destination += componentsSize;
93     pixelsPerRow = tailComponents / 4;
94 }
95
96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
97 {
98     unsigned componentsPerRow = pixelsPerRow * 4;
99     unsigned tailComponents = componentsPerRow % 32;
100     unsigned componentsSize = componentsPerRow - tailComponents;
101
102     for (unsigned i = 0; i < componentsSize; i += 32) {
103         uint16x8x4_t ARGB16 = vld4q_u16(source + i);
104         uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
105         uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
106         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
107         uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
108         uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
109         vst4_u8(destination + i, RGBA8);
110     }
111
112     source += componentsSize;
113     destination += componentsSize;
114     pixelsPerRow = tailComponents / 4;
115 }
116
117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
118 {
119     unsigned tailPixels = pixelsPerRow % 8;
120     unsigned pixelSize = pixelsPerRow - tailPixels;
121
122     uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);
123     for (unsigned i = 0; i < pixelSize; i += 8) {
124         uint16x8_t eightPixels = vld1q_u16(source + i);
125
126         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12));
127         uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f));
128         uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f));
129         uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f));
130
131         componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR);
132         componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG);
133         componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB);
134         componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA);
135
136         uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
137         vst4_u8(destination, destComponents);
138         destination += 32;
139     }
140
141     source += pixelSize;
142     pixelsPerRow = tailPixels;
143 }
144
145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
146 {
147     unsigned componentsPerRow = pixelsPerRow * 4;
148     unsigned tailComponents = componentsPerRow % 32;
149     unsigned componentsSize = componentsPerRow - tailComponents;
150
151     uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
152     uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);
153     for (unsigned i = 0; i < componentsSize; i += 32) {
154         uint8x8x4_t RGBA8 = vld4_u8(source + i);
155
156         uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0);
157         uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4);
158         uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0);
159         uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4);
160
161         uint8x8x2_t RGBA4;
162         RGBA4.val[0] = vorr_u8(componentB, componentA);
163         RGBA4.val[1] = vorr_u8(componentR, componentG);
164         vst2_u8(dst, RGBA4);
165         dst += 16;
166     }
167
168     source += componentsSize;
169     destination += componentsSize / 4;
170     pixelsPerRow = tailComponents / 4;
171 }
172
173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
174 {
175     unsigned tailPixels = pixelsPerRow % 8;
176     unsigned pixelSize = pixelsPerRow - tailPixels;
177
178     uint8x8_t immediate0x7 = vdup_n_u8(0x7);
179     uint8x8_t immediate0xff = vdup_n_u8(0xFF);
180     uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
181     uint16x8_t immediate0x1 = vdupq_n_u16(0x1);
182
183     for (unsigned i = 0; i < pixelSize; i += 8) {
184         uint16x8_t eightPixels = vld1q_u16(source + i);
185
186         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
187         uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f));
188         uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f));
189         uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1));
190
191         componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
192         componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immediate0x7));
193         componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
194         componentA = vmul_u8(componentA, immediate0xff);
195
196         uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
197         vst4_u8(destination, destComponents);
198         destination += 32;
199     }
200
201     source += pixelSize;
202     pixelsPerRow = tailPixels;
203 }
204
205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
206 {
207     unsigned componentsPerRow = pixelsPerRow * 4;
208     unsigned tailComponents = componentsPerRow % 32;
209     unsigned componentsSize = componentsPerRow - tailComponents;
210
211     uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
212
213     uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
214     uint8x8_t immediate0x18 = vdup_n_u8(0x18);
215     for (unsigned i = 0; i < componentsSize; i += 32) {
216         uint8x8x4_t RGBA8 = vld4_u8(source + i);
217
218         uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
219         uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5);
220
221         uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18), 3);
222         uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2);
223         uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7);
224
225         uint8x8x2_t RGBA5551;
226         RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), componentA);
227         RGBA5551.val[1] = vorr_u8(componentR, componentG3bit);
228         vst2_u8(dst, RGBA5551);
229         dst += 16;
230     }
231
232     source += componentsSize;
233     destination += componentsSize / 4;
234     pixelsPerRow = tailComponents / 4;
235 }
236
237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
238 {
239     unsigned tailPixels = pixelsPerRow % 8;
240     unsigned pixelSize = pixelsPerRow - tailPixels;
241
242     uint16x8_t immediate0x3f = vdupq_n_u16(0x3F);
243     uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
244     uint8x8_t immediate0x3 = vdup_n_u8(0x3);
245     uint8x8_t immediate0x7 = vdup_n_u8(0x7);
246
247     uint8x8_t componentA = vdup_n_u8(0xFF);
248
249     for (unsigned i = 0; i < pixelSize; i += 8) {
250         uint16x8_t eightPixels = vld1q_u16(source + i);
251
252         uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
253         uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f));
254         uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f));
255
256         componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
257         componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immediate0x3));
258         componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
259
260         uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
261         vst4_u8(destination, destComponents);
262         destination += 32;
263     }
264
265     source += pixelSize;
266     pixelsPerRow = tailPixels;
267 }
268
269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
270 {
271     unsigned componentsPerRow = pixelsPerRow * 4;
272     unsigned tailComponents = componentsPerRow % 32;
273     unsigned componentsSize = componentsPerRow - tailComponents;
274     uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
275
276     uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
277     uint8x8_t immediate0x1c = vdup_n_u8(0x1C);
278     for (unsigned i = 0; i < componentsSize; i += 32) {
279         uint8x8x4_t RGBA8 = vld4_u8(source + i);
280
281         uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
282         uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5);
283         uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1c), 3);
284         uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3);
285
286         uint8x8x2_t RGB565;
287         RGB565.val[0] = vorr_u8(componentGRight, componentB);
288         RGB565.val[1] = vorr_u8(componentR, componentGLeft);
289         vst2_u8(dst, RGB565);
290         dst += 16;
291     }
292
293     source += componentsSize;
294     destination += componentsSize / 4;
295     pixelsPerRow = tailComponents / 4;
296 }
297
298 } // namespace SIMD
299
300 } // namespace blink
301
302 #endif // HAVE(ARM_NEON_INTRINSICS)
303
304 #endif // WebGLImageConversionNEON_h