use SkClampRange to speedup clamp iterators
authorreed@google.com <reed@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Fri, 15 Apr 2011 15:50:34 +0000 (15:50 +0000)
committerreed@google.com <reed@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Fri, 15 Apr 2011 15:50:34 +0000 (15:50 +0000)
speeds up linear gradients 4x

git-svn-id: http://skia.googlecode.com/svn/trunk@1139 2bbb7eff-a529-9590-31e7-b0007b416f81

src/core/core_files.mk
src/effects/SkGradientShader.cpp
xcode/core/core.xcodeproj/project.pbxproj

index b3427b0..0a5f844 100644 (file)
@@ -22,6 +22,7 @@ SOURCE := \
     SkBuffer.cpp \
     SkCanvas.cpp \
     SkChunkAlloc.cpp \
+    SkClampRange.cpp \
     SkClipStack.cpp \
     SkColor.cpp \
     SkColorFilter.cpp \
index b212dfe..ea90e26 100644 (file)
     #define USE_DITHER_32BIT_GRADIENT
 #endif
 
+static void sk_memset32_dither(uint32_t dst[], uint32_t v0, uint32_t v1,
+                               int count) {
+    if (count > 0) {
+        if (v0 == v1) {
+            sk_memset32(dst, v0, count);
+        } else {
+            int pairs = count >> 1;
+            for (int i = 0; i < pairs; i++) {
+                *dst++ = v0;
+                *dst++ = v1;
+            }
+            if (count & 1) {
+                *dst = v0;
+            }
+        }
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 typedef SkFixed (*TileProc)(SkFixed);
@@ -794,6 +812,16 @@ static inline bool no_need_for_clamp(int fx, int dx, int count) {
     return (unsigned)((fx | (fx + (count - 1) * dx)) >> 8) <= 0xFF;
 }
 
+#include "SkClampRange.h"
+
+#define NO_CHECK_ITER               \
+    fi = fx >> 8;                   \
+    SkASSERT(fi <= 0xFF);           \
+    fx += dx;                       \
+    *dstC++ = cache[toggle + fi];   \
+    toggle ^= TOGGLE_MASK
+
+
 void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) {
     SkASSERT(count > 0);
 
@@ -830,6 +858,7 @@ void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) {
             // TODO: dither version
             sk_memset32(dstC, cache[fi >> (16 - kCache32Bits)], count);
         } else if (proc == clamp_tileproc) {
+#if 0
             do {
                 unsigned fi = SkClampMax(fx >> 8, 0xFF);
                 SkASSERT(fi <= 0xFF);
@@ -837,6 +866,39 @@ void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) {
                 *dstC++ = cache[toggle + fi];
                 toggle ^= TOGGLE_MASK;
             } while (--count != 0);
+#else
+            SkClampRange range;
+            range.init(fx, dx, count, 0, 0xFF);
+
+            if ((count = range.fCount0) > 0) {
+                sk_memset32_dither(dstC,
+                                   cache[toggle + range.fV0],
+                                   cache[(toggle ^ TOGGLE_MASK) + range.fV0],
+                                   count);
+                dstC += count;
+            }
+            if ((count = range.fCount1) > 0) {
+                unsigned fi;
+                int i, unroll = count >> 3;
+                for (i = 0; i < unroll; i++) {
+                    NO_CHECK_ITER;  NO_CHECK_ITER;
+                    NO_CHECK_ITER;  NO_CHECK_ITER;
+                    NO_CHECK_ITER;  NO_CHECK_ITER;
+                    NO_CHECK_ITER;  NO_CHECK_ITER;
+                }
+                if ((count &= 7) > 0) {
+                    do {
+                        NO_CHECK_ITER;
+                    } while (--count != 0);
+                }
+            }
+            if ((count = range.fCount2) > 0) {
+                sk_memset32_dither(dstC,
+                                   cache[toggle + range.fV1],
+                                   cache[(toggle ^ TOGGLE_MASK) + range.fV1],
+                                   count);
+            }
+#endif
         } else if (proc == mirror_tileproc) {
             do {
                 unsigned fi = mirror_8bits(fx >> 8);
@@ -911,6 +973,14 @@ static void dither_memset16(uint16_t dst[], uint16_t value, uint16_t other,
     }
 }
 
+#define NO_CHECK_ITER_16                \
+    fi = fx >> kCache16Shift;           \
+    SkASSERT(fi <= kCache16Mask);       \
+    fx += dx;                           \
+    *dstC++ = cache[toggle + fi];       \
+    toggle ^= TOGGLE_MASK
+
+
 void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
     SkASSERT(count > 0);
 
@@ -919,6 +989,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
     TileProc            proc = fTileProc;
     const uint16_t*     cache = this->getCache16();
     int                 toggle = ((x ^ y) & 1) << kCache16Bits;
+    const int           TOGGLE_MASK = (1 << kCache32Bits);
 
     if (fDstToIndexClass != kPerspective_MatrixClass) {
         dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
@@ -938,22 +1009,57 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
             // we're a vertical gradient, so no change in a span
             unsigned fi = proc(fx) >> kCache16Shift;
             SkASSERT(fi <= kCache16Mask);
-            dither_memset16(dstC, cache[toggle + fi], cache[(toggle ^ (1 << kCache16Bits)) + fi], count);
+            dither_memset16(dstC, cache[toggle + fi],
+                            cache[(toggle ^ TOGGLE_MASK) + fi], count);
         } else if (proc == clamp_tileproc) {
+#if 0
             do {
                 unsigned fi = SkClampMax(fx >> kCache16Shift, kCache16Mask);
                 SkASSERT(fi <= kCache16Mask);
                 fx += dx;
                 *dstC++ = cache[toggle + fi];
-                toggle ^= (1 << kCache16Bits);
+                toggle ^= TOGGLE_MASK;
             } while (--count != 0);
+#else
+            SkClampRange range;
+            range.init(fx, dx, count, 0, kCache16Mask);
+
+            if ((count = range.fCount0) > 0) {
+                dither_memset16(dstC,
+                                cache[toggle + range.fV0],
+                                cache[(toggle ^ TOGGLE_MASK) + range.fV0],
+                                count);
+                dstC += count;
+            }
+            if ((count = range.fCount1) > 0) {
+                unsigned fi;
+                int i, unroll = count >> 3;
+                for (i = 0; i < unroll; i++) {
+                    NO_CHECK_ITER_16;  NO_CHECK_ITER_16;
+                    NO_CHECK_ITER_16;  NO_CHECK_ITER_16;
+                    NO_CHECK_ITER_16;  NO_CHECK_ITER_16;
+                    NO_CHECK_ITER_16;  NO_CHECK_ITER_16;
+                }
+                if ((count &= 7) > 0) {
+                    do {
+                        NO_CHECK_ITER_16;
+                    } while (--count != 0);
+                }
+            }
+            if ((count = range.fCount2) > 0) {
+                dither_memset16(dstC,
+                                cache[toggle + range.fV1],
+                                cache[(toggle ^ TOGGLE_MASK) + range.fV1],
+                                count);
+            }
+#endif
         } else if (proc == mirror_tileproc) {
             do {
                 unsigned fi = mirror_bits(fx >> kCache16Shift, kCache16Bits);
                 SkASSERT(fi <= kCache16Mask);
                 fx += dx;
                 *dstC++ = cache[toggle + fi];
-                toggle ^= (1 << kCache16Bits);
+                toggle ^= TOGGLE_MASK;
             } while (--count != 0);
         } else {
             SkASSERT(proc == repeat_tileproc);
@@ -962,7 +1068,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
                 SkASSERT(fi <= kCache16Mask);
                 fx += dx;
                 *dstC++ = cache[toggle + fi];
-                toggle ^= (1 << kCache16Bits);
+                toggle ^= TOGGLE_MASK;
             } while (--count != 0);
         }
     } else {
@@ -975,7 +1081,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
 
             int index = fi >> kCache16Shift;
             *dstC++ = cache[toggle + index];
-            toggle ^= (1 << kCache16Bits);
+            toggle ^= TOGGLE_MASK;
 
             dstX += SK_Scalar1;
         } while (--count != 0);
index 1b3feb9..cf0813c 100644 (file)
@@ -11,6 +11,7 @@
                00244E10106A6DEA00B8F4D8 /* SkBlitRow_D32.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00244E0F106A6DEA00B8F4D8 /* SkBlitRow_D32.cpp */; };
                002884C80EFAB8B90083E387 /* SkMMapStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 002884C70EFAB8B90083E387 /* SkMMapStream.cpp */; };
                002884D50EFAB8F80083E387 /* SkStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 002884D40EFAB8F80083E387 /* SkStream.cpp */; };
+               0044DBAF13589C59004DFFAC /* SkClampRange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0044DBAE13589C59004DFFAC /* SkClampRange.cpp */; };
                005DC79910179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 005DC79810179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp */; };
                005DC79B10179AE000F00DFB /* SkFontHost.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 005DC79A10179AE000F00DFB /* SkFontHost.cpp */; };
                005F256F0EF94F7900582A90 /* ARGB32_Clamp_Bilinear_BitmapShader.h in Headers */ = {isa = PBXBuildFile; fileRef = 005F24F60EF94F7900582A90 /* ARGB32_Clamp_Bilinear_BitmapShader.h */; };
                002884C70EFAB8B90083E387 /* SkMMapStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkMMapStream.cpp; path = ../../src/core/SkMMapStream.cpp; sourceTree = SOURCE_ROOT; };
                002884D40EFAB8F80083E387 /* SkStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkStream.cpp; path = ../../src/core/SkStream.cpp; sourceTree = SOURCE_ROOT; };
                002B342710213340000F04C6 /* SkBlitRow_opts_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBlitRow_opts_none.cpp; path = ../../src/opts/SkBlitRow_opts_none.cpp; sourceTree = SOURCE_ROOT; };
+               0044DBAE13589C59004DFFAC /* SkClampRange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkClampRange.cpp; path = ../../src/core/SkClampRange.cpp; sourceTree = SOURCE_ROOT; };
                00554E0C102733D300C9C8ED /* SkBitmapProcState_opts_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBitmapProcState_opts_none.cpp; path = ../../src/opts/SkBitmapProcState_opts_none.cpp; sourceTree = SOURCE_ROOT; };
                005DC79810179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBlitter_ARGB32_Subpixel.cpp; path = ../../src/core/SkBlitter_ARGB32_Subpixel.cpp; sourceTree = SOURCE_ROOT; };
                005DC79A10179AE000F00DFB /* SkFontHost.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkFontHost.cpp; path = ../../src/core/SkFontHost.cpp; sourceTree = SOURCE_ROOT; };
                08FB7795FE84155DC02AAC07 /* src */ = {
                        isa = PBXGroup;
                        children = (
+                               0044DBAE13589C59004DFFAC /* SkClampRange.cpp */,
                                00E0627113439F5D0021A1A5 /* SkMetaData.cpp */,
                                0009B175131441CD00C52F70 /* SkClipStack.cpp */,
                                006EB61312EF97E100686979 /* SkRefDict.cpp */,
                                006EB61412EF97E100686979 /* SkRefDict.cpp in Sources */,
                                0009B176131441CD00C52F70 /* SkClipStack.cpp in Sources */,
                                00E0627213439F5D0021A1A5 /* SkMetaData.cpp in Sources */,
+                               0044DBAF13589C59004DFFAC /* SkClampRange.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };