Add SSSE3 acceleration for S32_D16_filter_DX
authorqiankun.miao <qiankun.miao@intel.com>
Wed, 10 Dec 2014 15:21:35 +0000 (07:21 -0800)
committerCommit bot <commit-bot@chromium.org>
Wed, 10 Dec 2014 15:21:35 +0000 (07:21 -0800)
With this CL, related nanobench can be improved for 565 config.
         bitmap_BGRA_8888_update_scale_bilerp   76.1us -> 46.7us        0.61x
                bitmap_BGRA_8888_scale_bilerp   78.7us ->   47us        0.6x
bitmap_BGRA_8888_update_volatile_scale_bilerp   82.7us -> 46.9us        0.57x

BUG=skia:

Review URL: https://codereview.chromium.org/788853002

src/opts/SkBitmapProcState_opts_SSSE3.cpp
src/opts/SkBitmapProcState_opts_SSSE3.h
src/opts/opts_check_x86.cpp

index 99bc19216cf68a8fb10295bb108c42bb8e23577e..984a65e5e784997d87ea7c01ef3e839dc7d0ffa4 100644 (file)
@@ -732,6 +732,17 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
     S32_generic_D32_filter_DXDY_SSSE3<true>(s, xy, count, colors);
 }
 
+void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
+                             const uint32_t* xy,
+                             int count, uint16_t* colors) {
+    SkASSERT(254 >= count);
+    SkAutoSTMalloc<254, uint32_t> colors32(count);
+    S32_generic_D32_filter_DX_SSSE3<false>(s, xy, count, colors32);
+    for(int i = 0; i < count; i++) {
+        *colors++ = SkPixel32ToPixel16(colors32[i]);
+    }
+}
+
 void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
                                const uint32_t* xy,
                                int count, uint16_t* colors) {
@@ -769,6 +780,12 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
     sk_throw();
 }
 
+void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
+                             const uint32_t* xy,
+                             int count, uint16_t* colors) {
+    sk_throw();
+}
+
 void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
                                const uint32_t* xy,
                                int count, uint16_t* colors) {
index 74504d8bc65e81b1db22f9c0c236c660376d14b6..c7a9a899236a07161dfcf57d66ece2793e253934 100644 (file)
@@ -23,6 +23,9 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
                                    const uint32_t* xy,
                                    int count, uint32_t* colors);
 
+void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
+                             const uint32_t* xy,
+                             int count, uint16_t* colors);
 void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
                                const uint32_t* xy,
                                int count, uint16_t* colors);
index 34aae928eb18e07d17669bb3cd3cc4b37c52831d..84041d05b0c8ba32494431f5c3c7ac1d94b9f854 100644 (file)
@@ -176,7 +176,11 @@ void SkBitmapProcState::platformProcs() {
 
     /* Check fSampleProc16 */
     if (fSampleProc16 == S32_D16_filter_DX) {
-        fSampleProc16 = S32_D16_filter_DX_SSE2;
+        if (ssse3) {
+            fSampleProc16 = S32_D16_filter_DX_SSSE3;
+        } else {
+            fSampleProc16 = S32_D16_filter_DX_SSE2;
+        }
     } else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
         fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
     }