Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / skia / src / opts / opts_check_x86.cpp
1 /*
2  * Copyright 2009 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7
8 #include "SkBitmapFilter_opts_SSE2.h"
9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSSE3.h"
11 #include "SkBitmapScaler.h"
12 #include "SkBlitMask.h"
13 #include "SkBlitRect_opts_SSE2.h"
14 #include "SkBlitRow.h"
15 #include "SkBlitRow_opts_SSE2.h"
16 #include "SkBlitRow_opts_SSE4.h"
17 #include "SkBlurImage_opts_SSE2.h"
18 #include "SkBlurImage_opts_SSE4.h"
19 #include "SkLazyPtr.h"
20 #include "SkMorphology_opts.h"
21 #include "SkMorphology_opts_SSE2.h"
22 #include "SkRTConf.h"
23 #include "SkUtils.h"
24 #include "SkUtils_opts_SSE2.h"
25 #include "SkXfermode.h"
26 #include "SkXfermode_proccoeff.h"
27
28 #if defined(_MSC_VER) && defined(_WIN64)
29 #include <intrin.h>
30 #endif
31
32 /* This file must *not* be compiled with -msse or any other optional SIMD
33    extension, otherwise gcc may generate SIMD instructions even for scalar ops
34    (and thus give an invalid instruction on Pentium3 on the code below).
35    For example, only files named *_SSE2.cpp in this directory should be
36    compiled with -msse2 or higher. */
37
38
39 /* Function to get the CPU SSE-level in runtime, for different compilers. */
40 #ifdef _MSC_VER
41 static inline void getcpuid(int info_type, int info[4]) {
42 #if defined(_WIN64)
43     __cpuid(info, info_type);
44 #else
45     __asm {
46         mov    eax, [info_type]
47         cpuid
48         mov    edi, [info]
49         mov    [edi], eax
50         mov    [edi+4], ebx
51         mov    [edi+8], ecx
52         mov    [edi+12], edx
53     }
54 #endif
55 }
56 #elif defined(__x86_64__)
57 static inline void getcpuid(int info_type, int info[4]) {
58     asm volatile (
59         "cpuid \n\t"
60         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
61         : "a"(info_type)
62     );
63 }
64 #else
65 static inline void getcpuid(int info_type, int info[4]) {
66     // We save and restore ebx, so this code can be compatible with -fPIC
67     asm volatile (
68         "pushl %%ebx      \n\t"
69         "cpuid            \n\t"
70         "movl %%ebx, %1   \n\t"
71         "popl %%ebx       \n\t"
72         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
73         : "a"(info_type)
74     );
75 }
76 #endif
77
78 ////////////////////////////////////////////////////////////////////////////////
79
80 /* Fetch the SIMD level directly from the CPU, at run-time.
81  * Only checks the levels needed by the optimizations in this file.
82  */
83 namespace {  // get_SIMD_level() technically must have external linkage, so no static.
84 int* get_SIMD_level() {
85     int cpu_info[4] = { 0, 0, 0, 0 };
86     getcpuid(1, cpu_info);
87
88     int* level = SkNEW(int);
89
90     if ((cpu_info[2] & (1<<20)) != 0) {
91         *level = SK_CPU_SSE_LEVEL_SSE42;
92     } else if ((cpu_info[2] & (1<<19)) != 0) {
93         *level = SK_CPU_SSE_LEVEL_SSE41;
94     } else if ((cpu_info[2] & (1<<9)) != 0) {
95         *level = SK_CPU_SSE_LEVEL_SSSE3;
96     } else if ((cpu_info[3] & (1<<26)) != 0) {
97         *level = SK_CPU_SSE_LEVEL_SSE2;
98     } else {
99         *level = 0;
100     }
101     return level;
102 }
103 } // namespace
104
105 SK_DECLARE_STATIC_LAZY_PTR(int, gSIMDLevel, get_SIMD_level);
106
107 /* Verify that the requested SIMD level is supported in the build.
108  * If not, check if the platform supports it.
109  */
110 static inline bool supports_simd(int minLevel) {
111 #if defined(SK_CPU_SSE_LEVEL)
112     if (minLevel <= SK_CPU_SSE_LEVEL) {
113         return true;
114     } else
115 #endif
116     {
117 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
118         /* For the Android framework we should always know at compile time if the device
119          * we are building for supports SSSE3.  The one exception to this rule is on the
120          * emulator where we are compiled without the -mssse3 option (so we have no
121          * SSSE3 procs) but can be run on a host machine that supports SSSE3
122          * instructions. So for that particular case we disable our SSSE3 options.
123          */
124         return false;
125 #else
126         return minLevel <= *gSIMDLevel.get();
127 #endif
128     }
129 }
130
131 ////////////////////////////////////////////////////////////////////////////////
132
133 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", true, "Use SSE optimized version of high quality image filters");
134
135 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
136     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
137         procs->fExtraHorizontalReads = 3;
138         procs->fConvolveVertically = &convolveVertically_SSE2;
139         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
140         procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
141         procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
142     }
143 }
144
145 ////////////////////////////////////////////////////////////////////////////////
146
147 void SkBitmapProcState::platformProcs() {
148     /* Every optimization in the function requires at least SSE2 */
149     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
150         return;
151     }
152
153     /* Check fSampleProc32 */
154     if (fSampleProc32 == S32_opaque_D32_filter_DX) {
155         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
156             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
157         } else {
158             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
159         }
160     } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
161         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
162             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
163         }
164     } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
165         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
166             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
167         } else {
168             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
169         }
170     } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
171         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
172             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
173         }
174     }
175
176     /* Check fSampleProc16 */
177     if (fSampleProc16 == S32_D16_filter_DX) {
178         fSampleProc16 = S32_D16_filter_DX_SSE2;
179     }
180
181     /* Check fMatrixProc */
182     if (fMatrixProc == ClampX_ClampY_filter_scale) {
183         fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
184     } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
185         fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
186     } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
187         fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
188     } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
189         fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
190     }
191
192     /* Check fShaderProc32 */
193     if (c_hqfilter_sse) {
194         if (fShaderProc32 == highQualityFilter32) {
195             fShaderProc32 = highQualityFilter_SSE2;
196         }
197     }
198 }
199
200 ////////////////////////////////////////////////////////////////////////////////
201
202 static SkBlitRow::Proc platform_16_procs[] = {
203     S32_D565_Opaque_SSE2,               // S32_D565_Opaque
204     NULL,                               // S32_D565_Blend
205     S32A_D565_Opaque_SSE2,              // S32A_D565_Opaque
206     NULL,                               // S32A_D565_Blend
207     S32_D565_Opaque_Dither_SSE2,        // S32_D565_Opaque_Dither
208     NULL,                               // S32_D565_Blend_Dither
209     S32A_D565_Opaque_Dither_SSE2,       // S32A_D565_Opaque_Dither
210     NULL,                               // S32A_D565_Blend_Dither
211 };
212
213 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
214     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
215         return platform_16_procs[flags];
216     } else {
217         return NULL;
218     }
219 }
220
221 static SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
222     NULL,                               // S32_Opaque,
223     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
224     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
225     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
226 };
227
228 #if defined(SK_ATT_ASM_SUPPORTED)
229 static SkBlitRow::Proc32 platform_32_procs_SSE4[] = {
230     NULL,                               // S32_Opaque,
231     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
232     S32A_Opaque_BlitRow32_SSE4_asm,     // S32A_Opaque
233     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
234 };
235 #endif
236
237 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
238 #if defined(SK_ATT_ASM_SUPPORTED)
239     if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
240         return platform_32_procs_SSE4[flags];
241     } else
242 #endif
243     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
244         return platform_32_procs_SSE2[flags];
245     } else {
246         return NULL;
247     }
248 }
249
250 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
251     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
252         return Color32_SSE2;
253     } else {
254         return NULL;
255     }
256 }
257
258 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
259
260 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
261 /* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
262     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
263         return ColorRect32_SSE2;
264     } else {
265         return NULL;
266     }
267 */
268     return NULL;
269 }
270
271 ////////////////////////////////////////////////////////////////////////////////
272
273 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
274                                                      SkMask::Format maskFormat,
275                                                      SkColor color) {
276     if (SkMask::kA8_Format != maskFormat) {
277         return NULL;
278     }
279
280     ColorProc proc = NULL;
281     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
282         switch (dstCT) {
283             case kN32_SkColorType:
284                 // The SSE2 version is not (yet) faster for black, so we check
285                 // for that.
286                 if (SK_ColorBLACK != color) {
287                     proc = SkARGB32_A8_BlitMask_SSE2;
288                 }
289                 break;
290             default:
291                 break;
292         }
293     }
294     return proc;
295 }
296
297 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
298     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
299         if (isOpaque) {
300             return SkBlitLCD16OpaqueRow_SSE2;
301         } else {
302             return SkBlitLCD16Row_SSE2;
303         }
304     } else {
305         return NULL;
306     }
307
308 }
309
310 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
311     return NULL;
312 }
313
314 ////////////////////////////////////////////////////////////////////////////////
315
316 SkMemset16Proc SkMemset16GetPlatformProc() {
317     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
318         return sk_memset16_SSE2;
319     } else {
320         return NULL;
321     }
322 }
323
324 SkMemset32Proc SkMemset32GetPlatformProc() {
325     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
326         return sk_memset32_SSE2;
327     } else {
328         return NULL;
329     }
330 }
331
332 SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
333     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
334         return sk_memcpy32_SSE2;
335     } else {
336         return NULL;
337     }
338 }
339
340 ////////////////////////////////////////////////////////////////////////////////
341
342 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
343     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
344         return NULL;
345     }
346     switch (type) {
347         case kDilateX_SkMorphologyProcType:
348             return SkDilateX_SSE2;
349         case kDilateY_SkMorphologyProcType:
350             return SkDilateY_SSE2;
351         case kErodeX_SkMorphologyProcType:
352             return SkErodeX_SSE2;
353         case kErodeY_SkMorphologyProcType:
354             return SkErodeY_SSE2;
355         default:
356             return NULL;
357     }
358 }
359
360 ////////////////////////////////////////////////////////////////////////////////
361
362 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
363                                SkBoxBlurProc* boxBlurY,
364                                SkBoxBlurProc* boxBlurXY,
365                                SkBoxBlurProc* boxBlurYX) {
366 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
367     return false;
368 #else
369     if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
370         return SkBoxBlurGetPlatformProcs_SSE4(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
371     }
372     else if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
373         return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
374     }
375     return false;
376 #endif
377 }
378
379 ////////////////////////////////////////////////////////////////////////////////
380
381 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
382                                                                 SkXfermode::Mode mode);
383
384 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
385                                                     SkXfermode::Mode mode);
386
387 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
388                                                     SkXfermode::Mode mode) {
389     return NULL;
390 }
391
392 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
393                                                SkXfermode::Mode mode);
394
395 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
396                                                SkXfermode::Mode mode) {
397     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
398         return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
399     } else {
400         return SkPlatformXfermodeFactory_impl(rec, mode);
401     }
402 }
403
404 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
405
406 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
407     return NULL;
408 }