From 981d4798007b91e2e19c13b171583927a56df63b Mon Sep 17 00:00:00 2001 From: "reed@google.com" Date: Wed, 9 Mar 2011 12:55:47 +0000 Subject: [PATCH] http://codereview.appspot.com/3980041/ Add blitmask procs (with optional platform acceleration) patch by yaojie.yan git-svn-id: http://skia.googlecode.com/svn/trunk@910 2bbb7eff-a529-9590-31e7-b0007b416f81 --- bench/RectBench.cpp | 110 ++++++++++++++++++++++++++++++++++++ include/core/SkBlitRow.h | 35 ++++++++++-- src/core/SkBlitRow_D32.cpp | 42 +++++++++++++- src/core/SkBlitter_ARGB32.cpp | 18 ++---- src/core/SkCoreBlitters.h | 21 +++---- src/opts/SkBlitRow_opts_SSE2.cpp | 118 +++++++++++++++++++++++++++++++++++---- src/opts/SkBlitRow_opts_SSE2.h | 4 ++ src/opts/SkBlitRow_opts_arm.cpp | 7 +++ src/opts/SkBlitRow_opts_none.cpp | 7 +++ src/opts/opts_check_SSE2.cpp | 37 +++++++++--- 10 files changed, 351 insertions(+), 48 deletions(-) diff --git a/bench/RectBench.cpp b/bench/RectBench.cpp index 0c3eb56..a6f300e 100644 --- a/bench/RectBench.cpp +++ b/bench/RectBench.cpp @@ -3,6 +3,7 @@ #include "SkPaint.h" #include "SkRandom.h" #include "SkString.h" +#include "SkShader.h" class RectBench : public SkBenchmark { public: @@ -112,6 +113,85 @@ protected: virtual const char* onGetName() { return fName; } }; +/******************************************************************************* + * to bench BlitMask [Opaque, Black, color, shader] + *******************************************************************************/ + +class BlitMaskBench : public RectBench { +public: + enum kMaskType { + kMaskOpaque = 0, + kMaskBlack, + kMaskColor, + KMaskShader + }; + SkCanvas::PointMode fMode; + const char* fName; + + BlitMaskBench(void* param, SkCanvas::PointMode mode, + BlitMaskBench::kMaskType type, const char* name) : + RectBench(param, 2), fMode(mode), _type(type) { + fName = name; + } + +protected: + virtual void onDraw(SkCanvas* canvas) { + SkScalar gSizes[] = { + SkIntToScalar(13), SkIntToScalar(24) + }; + size_t sizes = SK_ARRAY_COUNT(gSizes); + + if (this->hasStrokeWidth()) { + gSizes[0] = this->getStrokeWidth(); + sizes = 1; + } + SkRandom rand; + SkColor color = 0xFF000000; + U8CPU alpha = 0xFF; + SkPaint paint; + paint.setStrokeCap(SkPaint::kRound_Cap); + if (_type == KMaskShader) { + SkBitmap srcBM; + srcBM.setConfig(SkBitmap::kARGB_8888_Config, 10, 1); + srcBM.allocPixels(); + srcBM.eraseColor(0xFF00FF00); + + SkShader* s; + s = SkShader::CreateBitmapShader(srcBM, SkShader::kClamp_TileMode, + SkShader::kClamp_TileMode); + paint.setShader(s)->unref(); + } + for (size_t i = 0; i < sizes; i++) { + switch (_type) { + case kMaskOpaque: + color = fColors[i]; + alpha = 0xFF; + break; + case kMaskBlack: + alpha = 0xFF; + color = 0xFF000000; + break; + case kMaskColor: + color = fColors[i]; + alpha = rand.nextU() & 255; + break; + case KMaskShader: + break; + } + paint.setStrokeWidth(gSizes[i]); + this->setupPaint(&paint); + paint.setColor(color); + paint.setAlpha(alpha); + canvas->drawPoints(fMode, N * 2, SkTCast(fRects), paint); + } + } + virtual const char* onGetName() { return fName; } +private: + typedef RectBench INHERITED; + kMaskType _type; +}; + + static SkBenchmark* RectFactory1(void* p) { return SkNEW_ARGS(RectBench, (p, 1)); } static SkBenchmark* RectFactory2(void* p) { return SkNEW_ARGS(RectBench, (p, 3)); } static SkBenchmark* OvalFactory1(void* p) { return SkNEW_ARGS(OvalBench, (p, 1)); } @@ -128,6 +208,32 @@ static SkBenchmark* PolygonFactory(void* p) { return SkNEW_ARGS(PointsBench, (p, SkCanvas::kPolygon_PointMode, "polygon")); } +/* init the blitmask bench + */ +static SkBenchmark* BlitMaskOpaqueFactory(void* p) { + return SkNEW_ARGS(BlitMaskBench, + (p, SkCanvas::kPoints_PointMode, + BlitMaskBench::kMaskOpaque, "maskopaque") + ); +} +static SkBenchmark* BlitMaskBlackFactory(void* p) { + return SkNEW_ARGS(BlitMaskBench, + (p, SkCanvas::kPoints_PointMode, + BlitMaskBench::kMaskBlack, "maskblack") + ); +} +static SkBenchmark* BlitMaskColorFactory(void* p) { + return SkNEW_ARGS(BlitMaskBench, + (p, SkCanvas::kPoints_PointMode, + BlitMaskBench::kMaskColor, "maskcolor") + ); +} +static SkBenchmark* BlitMaskShaderFactory(void* p) { + return SkNEW_ARGS(BlitMaskBench, + (p, SkCanvas::kPoints_PointMode, + BlitMaskBench::KMaskShader, "maskshader") + ); +} static BenchRegistry gRectReg1(RectFactory1); static BenchRegistry gRectReg2(RectFactory2); static BenchRegistry gOvalReg1(OvalFactory1); @@ -137,3 +243,7 @@ static BenchRegistry gRRectReg2(RRectFactory2); static BenchRegistry gPointsReg(PointsFactory); static BenchRegistry gLinesReg(LinesFactory); static BenchRegistry gPolygonReg(PolygonFactory); +static BenchRegistry gRectRegOpaque(BlitMaskOpaqueFactory); +static BenchRegistry gRectRegBlack(BlitMaskBlackFactory); +static BenchRegistry gRectRegColor(BlitMaskColorFactory); +static BenchRegistry gRectRegShader(BlitMaskShaderFactory); diff --git a/include/core/SkBlitRow.h b/include/core/SkBlitRow.h index 2b652c2..bb8cbc2 100644 --- a/include/core/SkBlitRow.h +++ b/include/core/SkBlitRow.h @@ -18,9 +18,9 @@ public: /** Function pointer that reads a scanline of src SkPMColors, and writes a corresponding scanline of 16bit colors (specific format based on the config passed to the Factory. - + The x,y params are useful just for dithering - + @param alpha A global alpha to be applied to all of the src colors @param x The x coordinate of the beginning of the scanline @param y THe y coordinate of the scanline @@ -29,7 +29,7 @@ public: const SkPMColor* SK_RESTRICT src, int count, U8CPU alpha, int x, int y); - /** Function pointer that blends a single color with a row of 32-bit colors + /** Function pointer that blends a single color with a row of 32-bit colors onto a 32-bit destination */ typedef void (*ColorProc)(SkPMColor* dst, const SkPMColor* src, int count, @@ -56,12 +56,12 @@ public: int count, U8CPU alpha); static Proc32 Factory32(unsigned flags32); - + /** Blend a single color onto a row of S32 pixels, writing the result into a row of D32 pixels. src and dst may be the same memory, but if they are not, they may not overlap. */ - static void Color32(SkPMColor dst[], const SkPMColor src[], + static void Color32(SkPMColor dst[], const SkPMColor src[], int count, SkPMColor color); static ColorProc ColorProcFactory(); @@ -84,4 +84,29 @@ private: }; }; +/** + * Factory for blitmask procs + */ +class SkBlitMask { +public: + /** + * Function pointer that blits the mask into a device (dst) colorized + * by color. The number of pixels to blit is specified by width and height, + * but each scanline is offset by dstRB (rowbytes) and srcRB respectively. + */ + typedef void (*Proc)(void* dst, size_t dstRB, SkBitmap::Config dstConfig, + const uint8_t* mask, size_t maskRB, SkColor color, + int width, int height); + + /* Public entry-point to return a blitmask function ptr + */ + static Proc Factory(SkBitmap::Config dstConfig, SkColor color); + + /* return either platform specific optimized blitmask function-ptr, + * or NULL if no optimized + */ + static Proc PlatformProcs(SkBitmap::Config dstConfig, SkColor color); +}; + + #endif diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp index f500778..7b9aabc 100644 --- a/src/core/SkBlitRow_D32.cpp +++ b/src/core/SkBlitRow_D32.cpp @@ -128,7 +128,7 @@ SkBlitRow::Proc32 SkBlitRow::Factory32(unsigned flags) { SkASSERT(flags < SK_ARRAY_COUNT(gDefault_Procs32)); // just so we don't crash flags &= kFlags32_Mask; - + SkBlitRow::Proc32 proc = PlatformProcs32(flags); if (NULL == proc) { proc = gDefault_Procs32[flags]; @@ -146,7 +146,7 @@ SkBlitRow::Proc32 SkBlitRow::ColorProcFactory() { return proc; } -void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], +void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], int count, SkPMColor color) { if (count > 0) { if (0 == color) { @@ -168,5 +168,43 @@ void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], } } +/////////////////////////////////////////////////////////////////////////////// + +static void SkARGB32_BlitMask_portable(void* dst, size_t dstRB, + SkBitmap::Config dstConfig, + const uint8_t* mask, + size_t maskRB, SkColor color, + int width, int height) { + size_t dstOffset = dstRB - (width << 2); + size_t maskOffset = maskRB - width; + SkPMColor *device = (SkPMColor *)dst; + do { + int w = width; + do { + unsigned aa = *mask++; + *device = SkBlendARGB32(color, *device, aa); + device += 1; + } while (--w != 0); + device = (uint32_t*)((char*)device + dstOffset); + mask += maskOffset; + } while (--height != 0); +} +SkBlitMask::Proc SkBlitMask::Factory(SkBitmap::Config config, SkColor color) { + SkBlitMask::Proc proc = PlatformProcs(config, color); + if (NULL == proc) { + switch (config) { + case SkBitmap::kARGB_8888_Config: + if ( SK_ColorBLACK != color && 0xFF != SkColorGetA(color) ) { + //TODO: blitmask for black; + //TODO: blitmask for opaque; + proc = SkARGB32_BlitMask_portable; + } + break; + default: + break; + } + } + return proc; +} diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp index a775adb..3cb69a1 100644 --- a/src/core/SkBlitter_ARGB32.cpp +++ b/src/core/SkBlitter_ARGB32.cpp @@ -78,6 +78,9 @@ SkARGB32_Blitter::SkARGB32_Blitter(const SkBitmap& device, const SkPaint& paint) fPMColor = SkPackARGB32(fSrcA, fSrcR, fSrcG, fSrcB); fColor32Proc = SkBlitRow::ColorProcFactory(); + + // init the pro for blitmask + fBlitMaskProc = SkBlitMask::Factory(SkBitmap::kARGB_8888_Config, color); } const SkBitmap* SkARGB32_Blitter::justAnOpaqueColor(uint32_t* value) { @@ -195,19 +198,8 @@ void SkARGB32_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) { uint32_t* device = fDevice.getAddr32(x, y); const uint8_t* alpha = mask.getAddr(x, y); uint32_t srcColor = fPMColor; - unsigned devRB = fDevice.rowBytes() - (width << 2); - unsigned maskRB = mask.fRowBytes - width; - - do { - int w = width; - do { - unsigned aa = *alpha++; - *device = SkBlendARGB32(srcColor, *device, aa); - device += 1; - } while (--w != 0); - device = (uint32_t*)((char*)device + devRB); - alpha += maskRB; - } while (--height != 0); + fBlitMaskProc(device, fDevice.rowBytes(), SkBitmap::kARGB_8888_Config, + alpha, mask.fRowBytes, srcColor, width, height); } void SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask, diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h index 32e8035..5a407d6 100644 --- a/src/core/SkCoreBlitters.h +++ b/src/core/SkCoreBlitters.h @@ -2,16 +2,16 @@ ** ** Copyright 2006, The Android Open Source Project ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at ** -** http://www.apache.org/licenses/LICENSE-2.0 +** http://www.apache.org/licenses/LICENSE-2.0 ** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and ** limitations under the License. */ @@ -103,6 +103,7 @@ public: protected: SkColor fPMColor; SkBlitRow::ColorProc fColor32Proc; + SkBlitMask::Proc fBlitMaskProc; private: unsigned fSrcA, fSrcR, fSrcG, fSrcB; @@ -165,7 +166,7 @@ private: // illegal SkA1_Blitter& operator=(const SkA1_Blitter&); - + typedef SkRasterBlitter INHERITED; }; @@ -175,7 +176,7 @@ private: Currently, they make the following assumptions about the state of the paint: - + 1. If there is an xfermode, there will also be a shader 2. If there is a colorfilter, there will be a shader that itself handles calling the filter, so the blitter can always ignore the colorfilter obj diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 244dbb4..b5757fc 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -2,16 +2,16 @@ ** ** Copyright 2009, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ @@ -354,7 +354,7 @@ void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, // Get red and blue pixels into lower byte of each word. __m128i src_rb = _mm_and_si128(rb_mask, src_pixel); - + // Get alpha and green into lower byte of each word. __m128i src_ag = _mm_srli_epi16(src_pixel, 8); @@ -387,6 +387,104 @@ void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, src += 1; dst += 1; count--; - } + } } } + +void SkARGB32_BlitMask_SSE2(void* device, size_t dstRB, + SkBitmap::Config dstConfig, const uint8_t* mask, + size_t maskRB, SkColor color, + int width, int height) +{ + size_t dstOffset = dstRB - (width << 2); + size_t maskOffset = maskRB - width; + SkPMColor* dst = (SkPMColor *)device; + do { + int count = width; + if (count >= 4) { + while (((size_t)dst & 0x0F) != 0 && (count > 0)) { + *dst = SkBlendARGB32(color, *dst, *mask); + mask++; + dst++; + count--; + } + __m128i *d = reinterpret_cast<__m128i*>(dst); + __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); + __m128i c_256 = _mm_set1_epi16(256); + __m128i c_1 = _mm_set1_epi16(1); + __m128i src_pixel = _mm_set1_epi32(color); + while (count >= 4) { + // Load 4 pixels each of src and dest. + __m128i dst_pixel = _mm_load_si128(d); + + //set the aphla value + __m128i src_scale_wide = _mm_set_epi8(0, *(mask+3),\ + 0, *(mask+3),0, \ + *(mask+2),0, *(mask+2),\ + 0,*(mask+1), 0,*(mask+1),\ + 0, *mask,0,*mask); + + //call SkAlpha255To256() + src_scale_wide = _mm_add_epi16(src_scale_wide, c_1); + + // Get red and blue pixels into lower byte of each word. + __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); + __m128i src_rb = _mm_and_si128(rb_mask, src_pixel); + + // Get alpha and green into lower byte of each word. + __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); + __m128i src_ag = _mm_srli_epi16(src_pixel, 8); + + // Put per-pixel alpha in low byte of each word. + __m128i dst_alpha = _mm_shufflehi_epi16(src_ag, 0xF5); + dst_alpha = _mm_shufflelo_epi16(dst_alpha, 0xF5); + + // dst_alpha = dst_alpha * src_scale + dst_alpha = _mm_mullo_epi16(dst_alpha, src_scale_wide); + + // Divide by 256. + dst_alpha = _mm_srli_epi16(dst_alpha, 8); + + // Subtract alphas from 256, to get 1..256 + dst_alpha = _mm_sub_epi16(c_256, dst_alpha); + // Multiply red and blue by dst pixel alpha. + dst_rb = _mm_mullo_epi16(dst_rb, dst_alpha); + // Multiply alpha and green by dst pixel alpha. + dst_ag = _mm_mullo_epi16(dst_ag, dst_alpha); + + // Multiply red and blue by global alpha. + src_rb = _mm_mullo_epi16(src_rb, src_scale_wide); + // Multiply alpha and green by global alpha. + src_ag = _mm_mullo_epi16(src_ag, src_scale_wide); + // Divide by 256. + dst_rb = _mm_srli_epi16(dst_rb, 8); + src_rb = _mm_srli_epi16(src_rb, 8); + + // Mask out low bits (goodies already in the right place; no need to divide) + dst_ag = _mm_andnot_si128(rb_mask, dst_ag); + src_ag = _mm_andnot_si128(rb_mask, src_ag); + + // Combine back into RGBA. + dst_pixel = _mm_or_si128(dst_rb, dst_ag); + __m128i tmp_src_pixel = _mm_or_si128(src_rb, src_ag); + + // Add two pixels into result. + __m128i result = _mm_add_epi8(tmp_src_pixel, dst_pixel); + _mm_store_si128(d, result); + // load the next 4 pixel + mask = mask + 4; + d++; + count -= 4; + } + dst = reinterpret_cast(d); + } + while(count > 0) { + *dst= SkBlendARGB32(color, *dst, *mask); + dst += 1; + mask++; + count --; + } + dst = (SkPMColor *)((char*)dst + dstOffset); + mask += maskOffset; + } while (--height != 0); +} diff --git a/src/opts/SkBlitRow_opts_SSE2.h b/src/opts/SkBlitRow_opts_SSE2.h index c22edd8..d861bc5 100644 --- a/src/opts/SkBlitRow_opts_SSE2.h +++ b/src/opts/SkBlitRow_opts_SSE2.h @@ -28,3 +28,7 @@ void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src, int count, U8CPU alpha); +void SkARGB32_BlitMask_SSE2(void* device, size_t dstRB, + SkBitmap::Config dstConfig, const uint8_t* mask, + size_t maskRB, SkColor color, + int width, int height); diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp index f677b45..30d26ed 100644 --- a/src/opts/SkBlitRow_opts_arm.cpp +++ b/src/opts/SkBlitRow_opts_arm.cpp @@ -1318,3 +1318,10 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { return NULL; } + + +SkBlitMask::Proc SkBlitMask::PlatformProcs(SkBitmap::Config dstConfig, + SkColor color) +{ + return NULL; +} diff --git a/src/opts/SkBlitRow_opts_none.cpp b/src/opts/SkBlitRow_opts_none.cpp index 0eb1185..0fa098e 100644 --- a/src/opts/SkBlitRow_opts_none.cpp +++ b/src/opts/SkBlitRow_opts_none.cpp @@ -17,3 +17,10 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { return NULL; } + + +SkBlitMask::Proc SkBlitMask::PlatformProcs(SkBitmap::Config dstConfig, + SkColor color) +{ + return NULL; +} diff --git a/src/opts/opts_check_SSE2.cpp b/src/opts/opts_check_SSE2.cpp index fa7b17a..749117a 100644 --- a/src/opts/opts_check_SSE2.cpp +++ b/src/opts/opts_check_SSE2.cpp @@ -2,16 +2,16 @@ ** ** Copyright 2009, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ @@ -105,6 +105,27 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { } } + +SkBlitMask::Proc SkBlitMask::PlatformProcs(SkBitmap::Config dstConfig, + SkColor color) +{ + + SkBlitMask::Proc proc = NULL; + if (hasSSE2()) { + switch (dstConfig) { + case SkBitmap::kARGB_8888_Config: + // TODO: is our current SSE2 faster than the portable, even in + // the case of black or opaque? If so, no need for this check. + if ( SK_ColorBLACK != color && 0xFF != SkColorGetA(color)) + proc = SkARGB32_BlitMask_SSE2; + break; + default: + break; + } + } + return proc; +} + SkMemset16Proc SkMemset16GetPlatformProc() { if (hasSSE2()) { return sk_memset16_SSE2; -- 2.7.4