From: reed Date: Wed, 15 Apr 2015 20:13:48 +0000 (-0700) Subject: Speeup hairline curves (quads and cubics) X-Git-Tag: submit/tizen/20180928.044319~2770 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a0246165eab9018d07afc09ff826ea4c40898ebc;p=platform%2Fupstream%2FlibSkiaSharp.git Speeup hairline curves (quads and cubics) /skia/trunk> cat ../old.txt maxrss loops min median mean max stddev samples config bench 9M 1 4.28ms 4.32ms 4.36ms 4.67ms 3% ▄▁▁▃▂▂▁▁▂█ 8888 path_hairline_small_AA_cubic 9M 1 743µs 767µs 770µs 825µs 4% ▃▃▇▃▁▁▅▁█▁ 8888 path_hairline_small_AA_conic 9M 1 533µs 606µs 598µs 680µs 9% ▁▂▂█▆▇▇▄▂▂ 8888 path_hairline_small_AA_quad 9M 1 451µs 452µs 456µs 495µs 3% ▁▁▁▁█▁▁▁▁▁ 8888 path_hairline_small_AA_line /skia/trunk> cat ../new.txt maxrss loops min median mean max stddev samples config bench 9M 1 827µs 827µs 831µs 869µs 2% ▁▁▁▁▁▁▁█▁▁ 8888 path_hairline_small_AA_cubic 9M 1 515µs 517µs 517µs 518µs 0% ▇█▆▅▃▃▁▁▁▅ 8888 path_hairline_small_AA_conic 9M 1 310µs 311µs 315µs 332µs 2% ▂▁█▆▁▁▁▁▁▁ 8888 path_hairline_small_AA_quad 9M 1 254µs 254µs 258µs 276µs 3% ▁▁▁▁▁▁▁█▇▂ 8888 path_hairline_small_AA_line Edited revert of https://codereview.chromium.org/1085013003 TBR= Review URL: https://codereview.chromium.org/1078413003 --- diff --git a/src/core/SkBlitter.h b/src/core/SkBlitter.h index 2d4a0defbf..8d9f7bcbed 100644 --- a/src/core/SkBlitter.h +++ b/src/core/SkBlitter.h @@ -8,6 +8,12 @@ #ifndef SkBlitter_DEFINED #define SkBlitter_DEFINED +#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2 + #define SK_BLITANTIH2V2_VIRTUAL +#else + #define SK_BLITANTIH2V2_VIRTUAL virtual +#endif + #include "SkBitmap.h" #include "SkBitmapProcShader.h" #include "SkMask.h" @@ -54,7 +60,7 @@ public: virtual const SkBitmap* justAnOpaqueColor(uint32_t* value); // (x, y), (x + 1, y) - void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { + SK_BLITANTIH2V2_VIRTUAL void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { int16_t runs[3]; uint8_t aa[2]; @@ -67,7 +73,7 @@ public: } // (x, y), (x, y + 1) - void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { + SK_BLITANTIH2V2_VIRTUAL void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { int16_t runs[2]; uint8_t aa[1]; diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp index bbad6c7926..caf85ddc1f 100644 --- a/src/core/SkBlitter_ARGB32.cpp +++ b/src/core/SkBlitter_ARGB32.cpp @@ -106,6 +106,25 @@ void SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[], } } +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 +void SkARGB32_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);) + + device[0] = SkBlendARGB32(fPMColor, device[0], a0); + device[1] = SkBlendARGB32(fPMColor, device[1], a1); +} + +void SkARGB32_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);) + + device[0] = SkBlendARGB32(fPMColor, device[0], a0); + device = (uint32_t*)((char*)device + fDevice.rowBytes()); + device[0] = SkBlendARGB32(fPMColor, device[0], a1); +} +#endif + ////////////////////////////////////////////////////////////////////////////////////// #define solid_8_pixels(mask, dst, color) \ @@ -180,6 +199,25 @@ void SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask, } } +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 +void SkARGB32_Opaque_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);) + + device[0] = SkFastFourByteInterp(fPMColor, device[0], a0); + device[1] = SkFastFourByteInterp(fPMColor, device[1], a1); +} + +void SkARGB32_Opaque_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);) + + device[0] = SkFastFourByteInterp(fPMColor, device[0], a0); + device = (uint32_t*)((char*)device + fDevice.rowBytes()); + device[0] = SkFastFourByteInterp(fPMColor, device[0], a1); +} +#endif + /////////////////////////////////////////////////////////////////////////////// void SkARGB32_Blitter::blitV(int x, int y, int height, SkAlpha alpha) { @@ -256,6 +294,25 @@ void SkARGB32_Black_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[], } } +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 +void SkARGB32_Black_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);) + + device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0); + device[1] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[1], 256 - a1); +} + +void SkARGB32_Black_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { + uint32_t* device = fDevice.getAddr32(x, y); + SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);) + + device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0); + device = (uint32_t*)((char*)device + fDevice.rowBytes()); + device[0] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a1); +} +#endif + /////////////////////////////////////////////////////////////////////////////// // Special version of SkBlitRow::Factory32 that knows we're in kSrc_Mode, diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h index f4a5d6f4db..acc18febed 100644 --- a/src/core/SkCoreBlitters.h +++ b/src/core/SkCoreBlitters.h @@ -120,6 +120,10 @@ public: virtual void blitRect(int x, int y, int width, int height); virtual void blitMask(const SkMask&, const SkIRect&); virtual const SkBitmap* justAnOpaqueColor(uint32_t*); +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 + void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override; + void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override; +#endif protected: SkColor fColor; @@ -140,6 +144,10 @@ public: SkARGB32_Opaque_Blitter(const SkBitmap& device, const SkPaint& paint) : INHERITED(device, paint) { SkASSERT(paint.getAlpha() == 0xFF); } virtual void blitMask(const SkMask&, const SkIRect&); +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 + void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override; + void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override; +#endif private: typedef SkARGB32_Blitter INHERITED; @@ -150,6 +158,10 @@ public: SkARGB32_Black_Blitter(const SkBitmap& device, const SkPaint& paint) : INHERITED(device, paint) {} virtual void blitAntiH(int x, int y, const SkAlpha antialias[], const int16_t runs[]); +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 + void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override; + void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override; +#endif private: typedef SkARGB32_Opaque_Blitter INHERITED; diff --git a/src/core/SkGeometry.cpp b/src/core/SkGeometry.cpp index 50af22de37..5979615425 100644 --- a/src/core/SkGeometry.cpp +++ b/src/core/SkGeometry.cpp @@ -9,6 +9,7 @@ #include "SkMatrix.h" #include "SkNx.h" +#if 0 static Sk2s from_point(const SkPoint& point) { return Sk2s::Load(&point.fX); } @@ -18,6 +19,7 @@ static SkPoint to_point(const Sk2s& x) { x.store(&point.fX); return point; } +#endif static SkVector to_vector(const Sk2s& x) { SkVector vector; @@ -135,6 +137,18 @@ static SkScalar eval_quad_derivative(const SkScalar src[], SkScalar t) { return 2 * SkScalarMulAdd(A, t, B); } +void SkQuadToCoeff(const SkPoint pts[3], SkPoint coeff[3]) { + Sk2s p0 = from_point(pts[0]); + Sk2s p1 = from_point(pts[1]); + Sk2s p2 = from_point(pts[2]); + + Sk2s p1minus2 = p1 - p0; + + coeff[0] = to_point(p2 - p1 - p1 + p0); // A * t^2 + coeff[1] = to_point(p1minus2 + p1minus2); // B * t + coeff[2] = pts[0]; // C +} + void SkEvalQuadAt(const SkPoint src[3], SkScalar t, SkPoint* pt, SkVector* tangent) { SkASSERT(src); SkASSERT(t >= 0 && t <= SK_Scalar1); @@ -452,6 +466,26 @@ void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t) { dst[6] = src[3]; } +void SkCubicToCoeff(const SkPoint pts[4], SkPoint coeff[4]) { + Sk2s p0 = from_point(pts[0]); + Sk2s p1 = from_point(pts[1]); + Sk2s p2 = from_point(pts[2]); + Sk2s p3 = from_point(pts[3]); + + const Sk2s three(3); + Sk2s p1minusp2 = p1 - p2; + + Sk2s D = p0; + Sk2s A = p3 + three * p1minusp2 - D; + Sk2s B = three * (D - p1minusp2 - p1); + Sk2s C = three * (p1 - D); + + coeff[0] = to_point(A); + coeff[1] = to_point(B); + coeff[2] = to_point(C); + coeff[3] = to_point(D); +} + /* http://code.google.com/p/skia/issues/detail?id=32 This test code would fail when we didn't check the return result of diff --git a/src/core/SkGeometry.h b/src/core/SkGeometry.h index bafde61155..9ddd91f750 100644 --- a/src/core/SkGeometry.h +++ b/src/core/SkGeometry.h @@ -9,6 +9,22 @@ #define SkGeometry_DEFINED #include "SkMatrix.h" +#include "SkNx.h" + +static inline Sk2s from_point(const SkPoint& point) { + return Sk2s::Load(&point.fX); +} + +static inline SkPoint to_point(const Sk2s& x) { + SkPoint point; + x.store(&point.fX); + return point; +} + +static inline Sk2s sk2s_cubic_eval(const Sk2s& A, const Sk2s& B, const Sk2s& C, const Sk2s& D, + const Sk2s& t) { + return ((A * t + B) * t + C) * t + D; +} /** Given a quadratic equation Ax^2 + Bx + C = 0, return 0, 1, 2 roots for the equation. @@ -25,6 +41,16 @@ SkPoint SkEvalQuadTangentAt(const SkPoint src[3], SkScalar t); */ void SkEvalQuadAt(const SkPoint src[3], SkScalar t, SkPoint* pt, SkVector* tangent = NULL); +/** + * output is : eval(t) == coeff[0] * t^2 + coeff[1] * t + coeff[2] + */ +void SkQuadToCoeff(const SkPoint pts[3], SkPoint coeff[3]); + +/** + * output is : eval(t) == coeff[0] * t^3 + coeff[1] * t^2 + coeff[2] * t + coeff[3] + */ +void SkCubicToCoeff(const SkPoint pts[4], SkPoint coeff[4]); + /** Given a src quadratic bezier, chop it at the specified t value, where 0 < t < 1, and return the two new quadratics in dst: dst[0..2] and dst[2..4] diff --git a/src/core/SkScan_Hairline.cpp b/src/core/SkScan_Hairline.cpp index 0f2308b1f7..2ec051f8ab 100644 --- a/src/core/SkScan_Hairline.cpp +++ b/src/core/SkScan_Hairline.cpp @@ -1,4 +1,3 @@ - /* * Copyright 2006 The Android Open Source Project * @@ -6,7 +5,6 @@ * found in the LICENSE file. */ - #include "SkScan.h" #include "SkBlitter.h" #include "SkRasterClip.h" @@ -192,6 +190,10 @@ void SkScan::HairRect(const SkRect& rect, const SkRasterClip& clip, #include "SkPath.h" #include "SkGeometry.h" +#include "SkNx.h" + +#define kMaxCubicSubdivideLevel 6 +#define kMaxQuadSubdivideLevel 5 static int compute_int_quad_dist(const SkPoint pts[3]) { // compute the vector between the control point ([1]) and the middle of the @@ -214,6 +216,9 @@ static int compute_int_quad_dist(const SkPoint pts[3]) { static void hairquad(const SkPoint pts[3], const SkRegion* clip, SkBlitter* blitter, int level, SkScan::HairRgnProc lineproc) { + SkASSERT(level <= kMaxQuadSubdivideLevel); + +#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2 if (level > 0) { SkPoint tmp[5]; @@ -224,10 +229,113 @@ static void hairquad(const SkPoint pts[3], const SkRegion* clip, SkPoint tmp[] = { pts[0], pts[2] }; lineproc(tmp, 2, clip, blitter); } +#else + SkPoint coeff[3]; + SkQuadToCoeff(pts, coeff); + + const int lines = 1 << level; + Sk2s t(0); + Sk2s dt(SK_Scalar1 / lines); + + SkPoint tmp[(1 << kMaxQuadSubdivideLevel) + 1]; + SkASSERT((unsigned)lines < SK_ARRAY_COUNT(tmp)); + + tmp[0] = pts[0]; + Sk2s A = Sk2s::Load(&coeff[0].fX); + Sk2s B = Sk2s::Load(&coeff[1].fX); + Sk2s C = Sk2s::Load(&coeff[2].fX); + for (int i = 1; i < lines; ++i) { + t += dt; + ((A * t + B) * t + C).store(&tmp[i].fX); + } + tmp[lines] = pts[2]; + lineproc(tmp, lines + 1, clip, blitter); +#endif } -static void haircubic(const SkPoint pts[4], const SkRegion* clip, +#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2 +static inline Sk2s abs(const Sk2s& value) { + return Sk2s::Max(value, -value); +} + +static inline SkScalar max_component(const Sk2s& value) { + SkScalar components[2]; + value.store(components); + return SkTMax(components[0], components[1]); +} + +static inline int compute_cubic_segs(const SkPoint pts[4]) { + Sk2s p0 = from_point(pts[0]); + Sk2s p1 = from_point(pts[1]); + Sk2s p2 = from_point(pts[2]); + Sk2s p3 = from_point(pts[3]); + + const Sk2s oneThird(1.0f / 3.0f); + const Sk2s twoThird(2.0f / 3.0f); + + Sk2s p13 = oneThird * p3 + twoThird * p0; + Sk2s p23 = oneThird * p0 + twoThird * p3; + + SkScalar diff = max_component(Sk2s::Max(abs(p1 - p13), abs(p2 - p23))); + SkScalar tol = SK_Scalar1 / 8; + + for (int i = 0; i < kMaxCubicSubdivideLevel; ++i) { + if (diff < tol) { + return 1 << i; + } + tol *= 4; + } + return 1 << kMaxCubicSubdivideLevel; +} + +static bool lt_90(SkPoint p0, SkPoint pivot, SkPoint p2) { + return SkVector::DotProduct(p0 - pivot, p2 - pivot) >= 0; +} + +// The off-curve points are "inside" the limits of the on-curve pts +static bool quick_cubic_niceness_check(const SkPoint pts[4]) { + return lt_90(pts[1], pts[0], pts[3]) && + lt_90(pts[2], pts[0], pts[3]) && + lt_90(pts[1], pts[3], pts[0]) && + lt_90(pts[2], pts[3], pts[0]); +} + +static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* blitter, + SkScan::HairRgnProc lineproc) { + const int lines = compute_cubic_segs(pts); + SkASSERT(lines > 0); + if (1 == lines) { + SkPoint tmp[2] = { pts[0], pts[3] }; + lineproc(tmp, 2, clip, blitter); + return; + } + + SkPoint coeff[4]; + SkCubicToCoeff(pts, coeff); + + const Sk2s dt(SK_Scalar1 / lines); + Sk2s t(0); + + SkPoint tmp[(1 << kMaxCubicSubdivideLevel) + 1]; + SkASSERT((unsigned)lines < SK_ARRAY_COUNT(tmp)); + + tmp[0] = pts[0]; + Sk2s A = Sk2s::Load(&coeff[0].fX); + Sk2s B = Sk2s::Load(&coeff[1].fX); + Sk2s C = Sk2s::Load(&coeff[2].fX); + Sk2s D = Sk2s::Load(&coeff[3].fX); + for (int i = 1; i < lines; ++i) { + t += dt; + (((A * t + B) * t + C) * t + D).store(&tmp[i].fX); + } + tmp[lines] = pts[3]; + lineproc(tmp, lines + 1, clip, blitter); +} +#endif + +static inline void haircubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* blitter, int level, SkScan::HairRgnProc lineproc) { +#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2 if (level > 0) { SkPoint tmp[7]; @@ -238,10 +346,20 @@ static void haircubic(const SkPoint pts[4], const SkRegion* clip, SkPoint tmp[] = { pts[0], pts[3] }; lineproc(tmp, 2, clip, blitter); } -} +#else + if (quick_cubic_niceness_check(pts)) { + hair_cubic(pts, clip, blitter, lineproc); + } else { + SkPoint tmp[13]; + SkScalar tValues[3]; -#define kMaxCubicSubdivideLevel 6 -#define kMaxQuadSubdivideLevel 5 + int count = SkChopCubicAtMaxCurvature(pts, tmp, tValues); + for (int i = 0; i < count; i++) { + hair_cubic(&tmp[i * 3], clip, blitter, lineproc); + } + } +#endif +} static int compute_quad_level(const SkPoint pts[3]) { int d = compute_int_quad_dist(pts); @@ -311,9 +429,9 @@ static void hair_path(const SkPath& path, const SkRasterClip& rclip, SkBlitter* } break; } - case SkPath::kCubic_Verb: + case SkPath::kCubic_Verb: { haircubic(pts, clip, blitter, kMaxCubicSubdivideLevel, lineproc); - break; + } break; case SkPath::kClose_Verb: break; case SkPath::kDone_Verb: