From 5eb158d84d0e1f6108e4ded5d2de1be9db0bf46c Mon Sep 17 00:00:00 2001 From: "reed@google.com" Date: Fri, 15 Apr 2011 15:50:34 +0000 Subject: [PATCH] use SkClampRange to speedup clamp iterators speeds up linear gradients 4x git-svn-id: http://skia.googlecode.com/svn/trunk@1139 2bbb7eff-a529-9590-31e7-b0007b416f81 --- src/core/core_files.mk | 1 + src/effects/SkGradientShader.cpp | 116 ++++++++++++++++++++++++++++-- xcode/core/core.xcodeproj/project.pbxproj | 4 ++ 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/core/core_files.mk b/src/core/core_files.mk index b3427b0..0a5f844 100644 --- a/src/core/core_files.mk +++ b/src/core/core_files.mk @@ -22,6 +22,7 @@ SOURCE := \ SkBuffer.cpp \ SkCanvas.cpp \ SkChunkAlloc.cpp \ + SkClampRange.cpp \ SkClipStack.cpp \ SkColor.cpp \ SkColorFilter.cpp \ diff --git a/src/effects/SkGradientShader.cpp b/src/effects/SkGradientShader.cpp index b212dfe..ea90e26 100644 --- a/src/effects/SkGradientShader.cpp +++ b/src/effects/SkGradientShader.cpp @@ -27,6 +27,24 @@ #define USE_DITHER_32BIT_GRADIENT #endif +static void sk_memset32_dither(uint32_t dst[], uint32_t v0, uint32_t v1, + int count) { + if (count > 0) { + if (v0 == v1) { + sk_memset32(dst, v0, count); + } else { + int pairs = count >> 1; + for (int i = 0; i < pairs; i++) { + *dst++ = v0; + *dst++ = v1; + } + if (count & 1) { + *dst = v0; + } + } + } +} + /////////////////////////////////////////////////////////////////////////////// typedef SkFixed (*TileProc)(SkFixed); @@ -794,6 +812,16 @@ static inline bool no_need_for_clamp(int fx, int dx, int count) { return (unsigned)((fx | (fx + (count - 1) * dx)) >> 8) <= 0xFF; } +#include "SkClampRange.h" + +#define NO_CHECK_ITER \ + fi = fx >> 8; \ + SkASSERT(fi <= 0xFF); \ + fx += dx; \ + *dstC++ = cache[toggle + fi]; \ + toggle ^= TOGGLE_MASK + + void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) { SkASSERT(count > 0); @@ -830,6 +858,7 @@ void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) { // TODO: dither version sk_memset32(dstC, cache[fi >> (16 - kCache32Bits)], count); } else if (proc == clamp_tileproc) { +#if 0 do { unsigned fi = SkClampMax(fx >> 8, 0xFF); SkASSERT(fi <= 0xFF); @@ -837,6 +866,39 @@ void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) { *dstC++ = cache[toggle + fi]; toggle ^= TOGGLE_MASK; } while (--count != 0); +#else + SkClampRange range; + range.init(fx, dx, count, 0, 0xFF); + + if ((count = range.fCount0) > 0) { + sk_memset32_dither(dstC, + cache[toggle + range.fV0], + cache[(toggle ^ TOGGLE_MASK) + range.fV0], + count); + dstC += count; + } + if ((count = range.fCount1) > 0) { + unsigned fi; + int i, unroll = count >> 3; + for (i = 0; i < unroll; i++) { + NO_CHECK_ITER; NO_CHECK_ITER; + NO_CHECK_ITER; NO_CHECK_ITER; + NO_CHECK_ITER; NO_CHECK_ITER; + NO_CHECK_ITER; NO_CHECK_ITER; + } + if ((count &= 7) > 0) { + do { + NO_CHECK_ITER; + } while (--count != 0); + } + } + if ((count = range.fCount2) > 0) { + sk_memset32_dither(dstC, + cache[toggle + range.fV1], + cache[(toggle ^ TOGGLE_MASK) + range.fV1], + count); + } +#endif } else if (proc == mirror_tileproc) { do { unsigned fi = mirror_8bits(fx >> 8); @@ -911,6 +973,14 @@ static void dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, } } +#define NO_CHECK_ITER_16 \ + fi = fx >> kCache16Shift; \ + SkASSERT(fi <= kCache16Mask); \ + fx += dx; \ + *dstC++ = cache[toggle + fi]; \ + toggle ^= TOGGLE_MASK + + void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) { SkASSERT(count > 0); @@ -919,6 +989,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) { TileProc proc = fTileProc; const uint16_t* cache = this->getCache16(); int toggle = ((x ^ y) & 1) << kCache16Bits; + const int TOGGLE_MASK = (1 << kCache32Bits); if (fDstToIndexClass != kPerspective_MatrixClass) { dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf, @@ -938,22 +1009,57 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) { // we're a vertical gradient, so no change in a span unsigned fi = proc(fx) >> kCache16Shift; SkASSERT(fi <= kCache16Mask); - dither_memset16(dstC, cache[toggle + fi], cache[(toggle ^ (1 << kCache16Bits)) + fi], count); + dither_memset16(dstC, cache[toggle + fi], + cache[(toggle ^ TOGGLE_MASK) + fi], count); } else if (proc == clamp_tileproc) { +#if 0 do { unsigned fi = SkClampMax(fx >> kCache16Shift, kCache16Mask); SkASSERT(fi <= kCache16Mask); fx += dx; *dstC++ = cache[toggle + fi]; - toggle ^= (1 << kCache16Bits); + toggle ^= TOGGLE_MASK; } while (--count != 0); +#else + SkClampRange range; + range.init(fx, dx, count, 0, kCache16Mask); + + if ((count = range.fCount0) > 0) { + dither_memset16(dstC, + cache[toggle + range.fV0], + cache[(toggle ^ TOGGLE_MASK) + range.fV0], + count); + dstC += count; + } + if ((count = range.fCount1) > 0) { + unsigned fi; + int i, unroll = count >> 3; + for (i = 0; i < unroll; i++) { + NO_CHECK_ITER_16; NO_CHECK_ITER_16; + NO_CHECK_ITER_16; NO_CHECK_ITER_16; + NO_CHECK_ITER_16; NO_CHECK_ITER_16; + NO_CHECK_ITER_16; NO_CHECK_ITER_16; + } + if ((count &= 7) > 0) { + do { + NO_CHECK_ITER_16; + } while (--count != 0); + } + } + if ((count = range.fCount2) > 0) { + dither_memset16(dstC, + cache[toggle + range.fV1], + cache[(toggle ^ TOGGLE_MASK) + range.fV1], + count); + } +#endif } else if (proc == mirror_tileproc) { do { unsigned fi = mirror_bits(fx >> kCache16Shift, kCache16Bits); SkASSERT(fi <= kCache16Mask); fx += dx; *dstC++ = cache[toggle + fi]; - toggle ^= (1 << kCache16Bits); + toggle ^= TOGGLE_MASK; } while (--count != 0); } else { SkASSERT(proc == repeat_tileproc); @@ -962,7 +1068,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) { SkASSERT(fi <= kCache16Mask); fx += dx; *dstC++ = cache[toggle + fi]; - toggle ^= (1 << kCache16Bits); + toggle ^= TOGGLE_MASK; } while (--count != 0); } } else { @@ -975,7 +1081,7 @@ void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) { int index = fi >> kCache16Shift; *dstC++ = cache[toggle + index]; - toggle ^= (1 << kCache16Bits); + toggle ^= TOGGLE_MASK; dstX += SK_Scalar1; } while (--count != 0); diff --git a/xcode/core/core.xcodeproj/project.pbxproj b/xcode/core/core.xcodeproj/project.pbxproj index 1b3feb9..cf0813c 100644 --- a/xcode/core/core.xcodeproj/project.pbxproj +++ b/xcode/core/core.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ 00244E10106A6DEA00B8F4D8 /* SkBlitRow_D32.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00244E0F106A6DEA00B8F4D8 /* SkBlitRow_D32.cpp */; }; 002884C80EFAB8B90083E387 /* SkMMapStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 002884C70EFAB8B90083E387 /* SkMMapStream.cpp */; }; 002884D50EFAB8F80083E387 /* SkStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 002884D40EFAB8F80083E387 /* SkStream.cpp */; }; + 0044DBAF13589C59004DFFAC /* SkClampRange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0044DBAE13589C59004DFFAC /* SkClampRange.cpp */; }; 005DC79910179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 005DC79810179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp */; }; 005DC79B10179AE000F00DFB /* SkFontHost.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 005DC79A10179AE000F00DFB /* SkFontHost.cpp */; }; 005F256F0EF94F7900582A90 /* ARGB32_Clamp_Bilinear_BitmapShader.h in Headers */ = {isa = PBXBuildFile; fileRef = 005F24F60EF94F7900582A90 /* ARGB32_Clamp_Bilinear_BitmapShader.h */; }; @@ -147,6 +148,7 @@ 002884C70EFAB8B90083E387 /* SkMMapStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkMMapStream.cpp; path = ../../src/core/SkMMapStream.cpp; sourceTree = SOURCE_ROOT; }; 002884D40EFAB8F80083E387 /* SkStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkStream.cpp; path = ../../src/core/SkStream.cpp; sourceTree = SOURCE_ROOT; }; 002B342710213340000F04C6 /* SkBlitRow_opts_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBlitRow_opts_none.cpp; path = ../../src/opts/SkBlitRow_opts_none.cpp; sourceTree = SOURCE_ROOT; }; + 0044DBAE13589C59004DFFAC /* SkClampRange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkClampRange.cpp; path = ../../src/core/SkClampRange.cpp; sourceTree = SOURCE_ROOT; }; 00554E0C102733D300C9C8ED /* SkBitmapProcState_opts_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBitmapProcState_opts_none.cpp; path = ../../src/opts/SkBitmapProcState_opts_none.cpp; sourceTree = SOURCE_ROOT; }; 005DC79810179ACD00F00DFB /* SkBlitter_ARGB32_Subpixel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkBlitter_ARGB32_Subpixel.cpp; path = ../../src/core/SkBlitter_ARGB32_Subpixel.cpp; sourceTree = SOURCE_ROOT; }; 005DC79A10179AE000F00DFB /* SkFontHost.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SkFontHost.cpp; path = ../../src/core/SkFontHost.cpp; sourceTree = SOURCE_ROOT; }; @@ -344,6 +346,7 @@ 08FB7795FE84155DC02AAC07 /* src */ = { isa = PBXGroup; children = ( + 0044DBAE13589C59004DFFAC /* SkClampRange.cpp */, 00E0627113439F5D0021A1A5 /* SkMetaData.cpp */, 0009B175131441CD00C52F70 /* SkClipStack.cpp */, 006EB61312EF97E100686979 /* SkRefDict.cpp */, @@ -681,6 +684,7 @@ 006EB61412EF97E100686979 /* SkRefDict.cpp in Sources */, 0009B176131441CD00C52F70 /* SkClipStack.cpp in Sources */, 00E0627213439F5D0021A1A5 /* SkMetaData.cpp in Sources */, + 0044DBAF13589C59004DFFAC /* SkClampRange.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; -- 2.7.4