From: mtklein Date: Fri, 22 Jul 2016 18:05:04 +0000 (-0700) Subject: Add SkRasterPipeline blitter. X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~116^2~641 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9a5c47f4effba3b48a9a8c7c144b72b532d06efe;p=platform%2Fupstream%2FlibSkiaSharp.git Add SkRasterPipeline blitter. This is now pixel-exact with the existing sRGB SW impl as far as I've tested. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2146413002 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Debug-ASAN-Trybot Committed: https://skia.googlesource.com/skia/+/3011e337693a9786f62d8de9ac4b239ad6dbdaca Review-Url: https://codereview.chromium.org/2146413002 --- diff --git a/gyp/core.gypi b/gyp/core.gypi index 2e1c88a..736e47d 100644 --- a/gyp/core.gypi +++ b/gyp/core.gypi @@ -231,6 +231,7 @@ '<(skia_src_path)/core/SkQuadClipper.h', '<(skia_src_path)/core/SkRasterClip.cpp', '<(skia_src_path)/core/SkRasterPipeline.cpp', + '<(skia_src_path)/core/SkRasterPipelineBlitter.cpp', '<(skia_src_path)/core/SkRasterizer.cpp', '<(skia_src_path)/core/SkReadBuffer.h', '<(skia_src_path)/core/SkReadBuffer.cpp', diff --git a/include/core/SkColorFilter.h b/include/core/SkColorFilter.h index 7b2ee78..609550f 100644 --- a/include/core/SkColorFilter.h +++ b/include/core/SkColorFilter.h @@ -16,6 +16,7 @@ class GrContext; class GrFragmentProcessor; class SkBitmap; +class SkRasterPipeline; /** * ColorFilters are optional objects in the drawing pipeline. When present in @@ -70,6 +71,8 @@ public: virtual void filterSpan4f(const SkPM4f src[], int count, SkPM4f result[]) const; + bool appendStages(SkRasterPipeline*) const; + enum Flags { /** If set the filter methods will not change the alpha channel of the colors. */ @@ -168,6 +171,8 @@ public: protected: SkColorFilter() {} + virtual bool onAppendStages(SkRasterPipeline*) const; + private: /* * Returns 1 if this is a single filter (not a composition of other filters), otherwise it diff --git a/include/core/SkXfermode.h b/include/core/SkXfermode.h index 6215315..2d12b3c 100644 --- a/include/core/SkXfermode.h +++ b/include/core/SkXfermode.h @@ -14,6 +14,7 @@ class GrFragmentProcessor; class GrTexture; class GrXPFactory; +class SkRasterPipeline; class SkString; struct SkPM4f; @@ -164,6 +165,8 @@ public: virtual SkXfermodeProc4f getProc4f() const; + bool appendStages(SkRasterPipeline*) const; + /** * If the specified mode can be represented by a pair of Coeff, then return * true and set (if not NULL) the corresponding coeffs. If the mode is @@ -216,15 +219,15 @@ public: #if SK_SUPPORT_GPU /** Used by the SkXfermodeImageFilter to blend two colors via a GrFragmentProcessor. The input to the returned FP is the src color. The dst color is - provided by the dst param which becomes a child FP of the returned FP. + provided by the dst param which becomes a child FP of the returned FP. It is legal for the function to return a null output. This indicates that the output of the blend is simply the src color. */ virtual sk_sp makeFragmentProcessorForImageFilter( sk_sp dst) const; - /** A subclass must implement this factory function to work with the GPU backend. - The xfermode will return a factory for which the caller will get a ref. It is up + /** A subclass must implement this factory function to work with the GPU backend. + The xfermode will return a factory for which the caller will get a ref. It is up to the caller to install it. XferProcessors cannot use a background texture. */ virtual sk_sp asXPFactory() const; @@ -281,6 +284,7 @@ protected: virtual D32Proc onGetD32Proc(uint32_t flags) const; virtual F16Proc onGetF16Proc(uint32_t flags) const; + virtual bool onAppendStages(SkRasterPipeline*) const; private: enum { diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp index f5d13dc..db9fcda 100644 --- a/src/core/SkBlitter.cpp +++ b/src/core/SkBlitter.cpp @@ -853,6 +853,10 @@ SkBlitter* SkBlitter::Choose(const SkPixmap& device, p->setColor(0); } + if (SkBlitter* blitter = SkCreateRasterPipelineBlitter(device, *paint, allocator)) { + return blitter; + } + if (nullptr == shader) { if (mode) { // xfermodes (and filters) require shaders for our current blitters diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp index 167a668..31c0ddb 100644 --- a/src/core/SkColorFilter.cpp +++ b/src/core/SkColorFilter.cpp @@ -37,6 +37,14 @@ sk_sp SkColorFilter::asFragmentProcessor(GrContext*) const } #endif +bool SkColorFilter::appendStages(SkRasterPipeline* pipeline) const { + return this->onAppendStages(pipeline); +} + +bool SkColorFilter::onAppendStages(SkRasterPipeline*) const { + return false; +} + void SkColorFilter::filterSpan4f(const SkPM4f src[], int count, SkPM4f result[]) const { const int N = 128; SkPMColor tmp[N]; diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h index 8fcfde6..46b2b7e 100644 --- a/src/core/SkCoreBlitters.h +++ b/src/core/SkCoreBlitters.h @@ -210,4 +210,8 @@ SkBlitter* SkBlitter_ChooseD565(const SkPixmap& device, const SkPaint& paint, SkShader::Context* shaderContext, SkTBlitterAllocator* allocator); + +// Returns nullptr if no SkRasterPipeline blitter can be constructed for this paint. +SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap&, const SkPaint&, SkTBlitterAllocator*); + #endif diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp index 8991428..c50383a 100644 --- a/src/core/SkRasterPipeline.cpp +++ b/src/core/SkRasterPipeline.cpp @@ -22,11 +22,23 @@ void SkRasterPipeline::append(SkRasterPipeline::Fn body_fn, const void* body_ctx fTail.push_back({ &JustReturn, const_cast(tail_ctx) }); } -void SkRasterPipeline::run(size_t n) { +void SkRasterPipeline::extend(const SkRasterPipeline& src) { + SkASSERT(src.fBody.count() == src.fTail.count()); + + Fn body_fn = src.fBodyStart, + tail_fn = src.fTailStart; + for (int i = 0; i < src.fBody.count(); i++) { + this->append(body_fn, src.fBody[i].fCtx, + tail_fn, src.fTail[i].fCtx); + body_fn = src.fBody[i].fNext; + tail_fn = src.fTail[i].fNext; + } +} + +void SkRasterPipeline::run(size_t x, size_t n) { // It's fastest to start uninitialized if the compilers all let us. If not, next fastest is 0. Sk4f v; - size_t x = 0; while (n >= 4) { fBodyStart(fBody.begin(), x, v,v,v,v, v,v,v,v); x += 4; diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index 186ee65..acbabcb 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -72,9 +72,10 @@ public: SkRasterPipeline(); - // Run the pipeline constructed with append(), walking x through [0,n), + // Run the pipeline constructed with append(), walking x through [x,x+n), // generally in 4 pixel steps, but sometimes 1 pixel at a time. - void run(size_t n); + void run(size_t x, size_t n); + void run(size_t n) { this->run(0, n); } // Use this append() if your stage is sensitive to the number of pixels you're working with: // - body will always be called for a full 4 pixels @@ -93,6 +94,9 @@ public: this->append(body, ctx, tail, ctx); } + // Append all stages to this pipeline. + void extend(const SkRasterPipeline&); + private: using Stages = SkSTArray<10, Stage, /*MEM_COPY=*/true>; diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp new file mode 100644 index 0000000..46a822e --- /dev/null +++ b/src/core/SkRasterPipelineBlitter.cpp @@ -0,0 +1,326 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkBlitter.h" +#include "SkColor.h" +#include "SkColorFilter.h" +#include "SkPM4f.h" +#include "SkPM4fPriv.h" +#include "SkRasterPipeline.h" +#include "SkShader.h" +#include "SkSRGB.h" +#include "SkXfermode.h" + + +class SkRasterPipelineBlitter : public SkBlitter { +public: + static SkBlitter* Create(const SkPixmap&, const SkPaint&, SkTBlitterAllocator*); + + SkRasterPipelineBlitter(SkPixmap dst, + SkRasterPipeline shader, + SkRasterPipeline colorFilter, + SkRasterPipeline xfermode, + SkPM4f paintColor) + : fDst(dst) + , fShader(shader) + , fColorFilter(colorFilter) + , fXfermode(xfermode) + , fPaintColor(paintColor) + {} + + void blitH (int x, int y, int w) override; + void blitAntiH(int x, int y, const SkAlpha[], const int16_t[]) override; + void blitMask (const SkMask&, const SkIRect& clip) override; + + // TODO: The default implementations of the other blits look fine, + // but some of them like blitV could probably benefit from custom + // blits using something like a SkRasterPipeline::runFew() method. + +private: + SkPixmap fDst; + SkRasterPipeline fShader, fColorFilter, fXfermode; + SkPM4f fPaintColor; + + typedef SkBlitter INHERITED; +}; + +SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst, + const SkPaint& paint, + SkTBlitterAllocator* alloc) { + return SkRasterPipelineBlitter::Create(dst, paint, alloc); +} + + +// The default shader produces a constant color (from the SkPaint). +static void SK_VECTORCALL constant_color(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto color = st->ctx(); + r = color->r(); + g = color->g(); + b = color->b(); + a = color->a(); + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// The default transfer mode is srcover, s' = s + d*(1-sa). +static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto A = 1.0f - a; + r += dr*A; + g += dg*A; + b += db*A; + a += da*A; + st->next(x, r,g,b,a, dr,dg,db,da); +} + +static Sk4f lerp(const Sk4f& from, const Sk4f& to, const Sk4f& cov) { + return from + (to-from)*cov; +} + +// s' = d(1-c) + sc, for a constant c. +static void SK_VECTORCALL lerp_constant_float(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + Sk4f c = *st->ctx(); + + r = lerp(dr, r, c); + g = lerp(dg, g, c); + b = lerp(db, b, c); + a = lerp(da, a, c); + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// s' = d(1-c) + sc, 4 pixels at a time for 8-bit coverage. +static void SK_VECTORCALL lerp_a8(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + Sk4f c = SkNx_cast(Sk4b::Load(ptr)) * (1/255.0f); + + r = lerp(dr, r, c); + g = lerp(dg, g, c); + b = lerp(db, b, c); + a = lerp(da, a, c); + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// Tail variant of lerp_a8() handling 1 pixel at a time. +static void SK_VECTORCALL lerp_a8_1(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + Sk4f c = *ptr * (1/255.0f); + + r = lerp(dr, r, c); + g = lerp(dg, g, c); + b = lerp(db, b, c); + a = lerp(da, a, c); + st->next(x, r,g,b,a, dr,dg,db,da); +} + +static void upscale_lcd16(const Sk4h& lcd16, Sk4f* r, Sk4f* g, Sk4f* b) { + Sk4i _32_bit = SkNx_cast(lcd16); + + *r = SkNx_cast(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE); + *g = SkNx_cast(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE); + *b = SkNx_cast(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE); +} + +// s' = d(1-c) + sc, 4 pixels at a time for 565 coverage. +static void SK_VECTORCALL lerp_lcd16(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + Sk4f cr, cg, cb; + upscale_lcd16(Sk4h::Load(ptr), &cr, &cg, &cb); + + r = lerp(dr, r, cr); + g = lerp(dg, g, cg); + b = lerp(db, b, cb); + a = 1.0f; + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// Tail variant of lerp_lcd16() handling 1 pixel at a time. +static void SK_VECTORCALL lerp_lcd16_1(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + Sk4f cr, cg, cb; + upscale_lcd16({*ptr,0,0,0}, &cr, &cg, &cb); + + r = lerp(dr, r, cr); + g = lerp(dg, g, cg); + b = lerp(db, b, cb); + a = 1.0f; + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// Load 4 8-bit sRGB pixels from SkPMColor order to RGBA. +static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + + dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[3] >> SK_R32_SHIFT) & 0xff] }; + + dg = { sk_linear_from_srgb[(ptr[0] >> SK_G32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[1] >> SK_G32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[2] >> SK_G32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[3] >> SK_G32_SHIFT) & 0xff] }; + + db = { sk_linear_from_srgb[(ptr[0] >> SK_B32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[1] >> SK_B32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[2] >> SK_B32_SHIFT) & 0xff], + sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] }; + + // TODO: this >> doesn't really need mask if we make it logical instead of arithmetic. + da = SkNx_cast((Sk4i::Load(ptr) >> SK_A32_SHIFT) & 0xff) * (1/255.0f); + + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// Tail variant of load_d_srgb() handling 1 pixel at a time. +static void SK_VECTORCALL load_d_srgb_1(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto ptr = st->ctx() + x; + + dr = { sk_linear_from_srgb[(*ptr >> SK_R32_SHIFT) & 0xff], 0,0,0 }; + dg = { sk_linear_from_srgb[(*ptr >> SK_G32_SHIFT) & 0xff], 0,0,0 }; + db = { sk_linear_from_srgb[(*ptr >> SK_B32_SHIFT) & 0xff], 0,0,0 }; + da = { (1/255.0f) * (*ptr >> SK_A32_SHIFT) , 0,0,0 }; + + st->next(x, r,g,b,a, dr,dg,db,da); +} + +// Write out 4 pixels as 8-bit SkPMColor-order sRGB. +static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto dst = st->ctx() + x; + ( sk_linear_to_srgb(r) << SK_R32_SHIFT + | sk_linear_to_srgb(g) << SK_G32_SHIFT + | sk_linear_to_srgb(b) << SK_B32_SHIFT + | Sk4f_round(255.0f*a) << SK_A32_SHIFT).store(dst); +} + +// Tail variant of store_srgb() handling 1 pixel at a time. +static void SK_VECTORCALL store_srgb_1(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + auto dst = st->ctx() + x; + *dst = Sk4f_toS32(swizzle_rb_if_bgra({ r[0], g[0], b[0], a[0] })); +} + + +template +static bool append_effect_stages(const Effect* effect, SkRasterPipeline* pipeline) { + return !effect || effect->appendStages(pipeline); +} + + +SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst, + const SkPaint& paint, + SkTBlitterAllocator* alloc) { + if (!dst.info().gammaCloseToSRGB()) { + return nullptr; // TODO: f16, etc. + } + if (paint.getShader()) { + return nullptr; // TODO: need to work out how shaders and their contexts work + } + + SkRasterPipeline shader, colorFilter, xfermode; + if (!append_effect_stages(paint.getColorFilter(), &colorFilter) || + !append_effect_stages(paint.getXfermode(), &xfermode )) { + return nullptr; + } + + auto blitter = alloc->createT( + dst, + shader, colorFilter, xfermode, + SkColor4f::FromColor(paint.getColor()).premul()); + + if (!paint.getShader()) { + blitter->fShader.append(constant_color, &blitter->fPaintColor); + } + if (!paint.getXfermode()) { + blitter->fXfermode.append(srcover); + } + + return blitter; +} + +void SkRasterPipelineBlitter::blitH(int x, int y, int w) { + auto dst = fDst.writable_addr(0,y); + + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + p.append(load_d_srgb, load_d_srgb_1, dst); + p.extend(fXfermode); + p.append(store_srgb, store_srgb_1, dst); + + p.run(x, w); +} + +void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) { + auto dst = fDst.writable_addr(0,y); + float coverage; + + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + p.append(load_d_srgb, load_d_srgb_1, dst); + p.extend(fXfermode); + p.append(lerp_constant_float, &coverage); + p.append(store_srgb, store_srgb_1, dst); + + for (int16_t run = *runs; run > 0; run = *runs) { + coverage = *aa * (1/255.0f); + p.run(x, run); + + x += run; + runs += run; + aa += run; + } +} + +void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip) { + if (mask.fFormat == SkMask::kBW_Format) { + // TODO: native BW masks? + return INHERITED::blitMask(mask, clip); + } + + int x = clip.left(); + for (int y = clip.top(); y < clip.bottom(); y++) { + auto dst = fDst.writable_addr(0,y); + + SkRasterPipeline p; + p.extend(fShader); + p.extend(fColorFilter); + p.append(load_d_srgb, load_d_srgb_1, dst); + p.extend(fXfermode); + switch (mask.fFormat) { + case SkMask::kA8_Format: + p.append(lerp_a8, lerp_a8_1, mask.getAddr8(x,y)-x); + break; + case SkMask::kLCD16_Format: + p.append(lerp_lcd16, lerp_lcd16_1, mask.getAddrLCD16(x,y)-x); + break; + default: break; + } + p.append(store_srgb, store_srgb_1, dst); + + p.run(x, clip.width()); + } +} diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index e91f53a..9246568 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -1420,6 +1420,14 @@ bool SkXfermode::IsOpaque(const SkXfermode* xfer, SrcColorOpacity opacityType) { return xfer->isOpaque(opacityType); } +bool SkXfermode::appendStages(SkRasterPipeline* pipeline) const { + return this->onAppendStages(pipeline); +} + +bool SkXfermode::onAppendStages(SkRasterPipeline*) const { + return false; +} + SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode) SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode) SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END