From: mtklein Date: Wed, 20 Jul 2016 19:10:11 +0000 (-0700) Subject: Tune linear->sRGB constants to round-trip all bytes. X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~116^2~711 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=566ea9b9fc6746ffad390a4029e56d985eb2aec8;p=platform%2Fupstream%2FlibSkiaSharp.git Tune linear->sRGB constants to round-trip all bytes. I basically just ran a big 5-deep for-loop over the five constants here. This is the first set of coefficients I found that round trips all bytes. I suspect there are many such sets. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2162063003 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2162063003 --- diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp index b3b8798..0243940 100644 --- a/bench/SkRasterPipelineBench.cpp +++ b/bench/SkRasterPipelineBench.cpp @@ -139,24 +139,15 @@ static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, st->next(x, r,g,b,a, dr,dg,db,da); } -static Sk4f clamp(const Sk4f& x) { - return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); -} - static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, Sk4f r, Sk4f g, Sk4f b, Sk4f a, Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { auto ptr = st->ctx() + x; - r = clamp(sk_linear_to_srgb(r)); - g = clamp(sk_linear_to_srgb(g)); - b = clamp(sk_linear_to_srgb(b)); - a = clamp( 255.0f * a ); - - ( SkNx_cast(r) - | SkNx_cast(g) << 8 - | SkNx_cast(b) << 16 - | SkNx_cast(a) << 24 ).store(ptr); + ( sk_linear_to_srgb(r) + | sk_linear_to_srgb(g) << 8 + | sk_linear_to_srgb(b) << 16 + | Sk4f_round(255.0f*a) << 24).store(ptr); } static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, @@ -164,9 +155,8 @@ static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { auto ptr = st->ctx() + x; - auto rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); - rgba = {rgba[0], rgba[1], rgba[2], 255.0f*a[0]}; - rgba = clamp(rgba); + Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); + rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; SkNx_cast(rgba).store(ptr); } diff --git a/src/core/SkSRGB.h b/src/core/SkSRGB.h index d567a96..d3baa74 100644 --- a/src/core/SkSRGB.h +++ b/src/core/SkSRGB.h @@ -14,37 +14,33 @@ * * Current best practices: * - for sRGB -> linear, lookup R,G,B in sk_linear_from_srgb; - * - for linear -> sRGB, call sk_linear_to_srgb() for R,G,B, and round; + * - for linear -> sRGB, call sk_linear_to_srgb() for R,G,B; * - the alpha channel is linear in both formats, needing at most *(1/255.0f) or *255.0f. * - * sk_linear_to_srgb()'s output requires rounding; it does not round for you. - * - * Given inputs in [0,1], sk_linear_to_srgb() will not underflow 0 but may overflow 255. - * The overflow is small enough to be handled by rounding. - * (But if you don't trust the inputs are in [0,1], you'd better clamp both sides immediately.) - * * sk_linear_to_srgb() will run a little faster than usual when compiled with SSE4.1+. */ extern const float sk_linear_from_srgb[256]; -static inline Sk4f sk_linear_to_srgb(const Sk4f& x) { +static inline Sk4i sk_linear_to_srgb(const Sk4f& x) { // Approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels). - // For 0.00000f <= x < 0.00349f, 12.92 * x - // For 0.00349f <= x <= 1.00000f, 0.679*(x.^0.5) + 0.423*x.^(0.25) - 0.101 - // Note that 0.00349 was selected because it is a point where both functions produce the - // same pixel value when rounded. + // + // Tuned by brute force to minimize the number of bytes that fail to round trip, + // here 0 (of 256), and then to minimize the number of points halfway between bytes + // (in linear space) that fail to hit the right byte, here 131 (of 255), and to + // minimize the number of monotonicity regressions over the range [0,1], here 0. + auto rsqrt = x.rsqrt(), sqrt = rsqrt.invert(), ftrt = rsqrt.rsqrt(); - auto lo = (12.92f * 255.0f) * x; + auto lo = (13.0471f * 255.0f) * x; - auto hi = (-0.101115084998961f * 255.0f) + - (+0.678513029959381f * 255.0f) * sqrt + - (+0.422602055039580f * 255.0f) * ftrt; + auto hi = (-0.0974983f * 255.0f) + + (+0.687999f * 255.0f) * sqrt + + (+0.412999f * 255.0f) * ftrt; - return (x < 0.00349f).thenElse(lo, hi); + return SkNx_cast( (x < 0.0048f).thenElse(lo, hi) ); } #endif//SkSRGB_DEFINED diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h index 3bb11f5..af683e1 100644 --- a/src/opts/SkColorXform_opts.h +++ b/src/opts/SkColorXform_opts.h @@ -16,20 +16,20 @@ namespace SK_OPTS_NS { -static Sk4f linear_to_2dot2(const Sk4f& x) { +static Sk4f clamp_0_1(const Sk4f& x) { + // The order of the arguments is important here. We want to make sure that NaN + // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. + return Sk4f::Min(Sk4f::Max(x, 0.0f), 1.0f); +} + +static Sk4i linear_to_2dot2(const Sk4f& x) { // x^(29/64) is a very good approximation of the true value, x^(1/2.2). auto x2 = x.rsqrt(), // x^(-1/2) x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) x64 = x32.rsqrt(); // x^(+1/64) // 29 = 32 - 2 - 1 - return 255.0f * x2.invert() * x32 * x64.invert(); -} - -static Sk4f clamp_0_to_255(const Sk4f& x) { - // The order of the arguments is important here. We want to make sure that NaN - // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. - return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); + return Sk4f_round(255.0f * x2.invert() * x32 * x64.invert()); } enum DstGamma { @@ -79,21 +79,18 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables] { if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { - Sk4f (*linear_to_curve)(const Sk4f&) = + Sk4i (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2; - dstReds = linear_to_curve(dstReds); - dstGreens = linear_to_curve(dstGreens); - dstBlues = linear_to_curve(dstBlues); + auto reds = linear_to_curve(clamp_0_1(dstReds)); + auto greens = linear_to_curve(clamp_0_1(dstGreens)); + auto blues = linear_to_curve(clamp_0_1(dstBlues)); - dstReds = clamp_0_to_255(dstReds); - dstGreens = clamp_0_to_255(dstGreens); - dstBlues = clamp_0_to_255(dstBlues); - auto rgba = (Sk4f_round(dstReds) << SK_R32_SHIFT) - | (Sk4f_round(dstGreens) << SK_G32_SHIFT) - | (Sk4f_round(dstBlues) << SK_B32_SHIFT) - | (Sk4i{ 0xFF << SK_A32_SHIFT}); + auto rgba = (reds << SK_R32_SHIFT) + | (greens << SK_G32_SHIFT) + | (blues << SK_B32_SHIFT) + | (Sk4i{0xFF} << SK_A32_SHIFT); rgba.store((uint32_t*) dst); dst = SkTAddOffset(dst, 4 * sizeof(uint32_t)); @@ -155,15 +152,13 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { - Sk4f (*linear_to_curve)(const Sk4f&) = + Sk4i (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2; - dstPixel = linear_to_curve(dstPixel); - - dstPixel = clamp_0_to_255(dstPixel); + auto pixel = linear_to_curve(clamp_0_1(dstPixel)); uint32_t rgba; - SkNx_cast(Sk4f_round(dstPixel)).store(&rgba); + SkNx_cast(pixel).store(&rgba); rgba |= 0xFF000000; *((uint32_t*) dst) = SkSwizzle_RGBA_to_PMColor(rgba); dst = SkTAddOffset(dst, sizeof(uint32_t)); diff --git a/tests/SRGBTest.cpp b/tests/SRGBTest.cpp index 65bfc59..43ec027 100644 --- a/tests/SRGBTest.cpp +++ b/tests/SRGBTest.cpp @@ -11,28 +11,24 @@ #include static uint8_t linear_to_srgb(float l) { - // Round float to int, truncate that to uint8_t. - return (uint8_t)Sk4f_round( sk_linear_to_srgb(Sk4f{l}) )[0]; + return (uint8_t)sk_linear_to_srgb(Sk4f{l})[0]; } DEF_TEST(sk_linear_to_srgb, r) { - // Should map 0 -> 0 and 1 -> 1. - REPORTER_ASSERT(r, 0 == linear_to_srgb(0.0f)); - REPORTER_ASSERT(r, 255 == linear_to_srgb(1.0f)); + // All bytes should round trip. + for (int i = 0; i < 256; i++) { + int actual = linear_to_srgb(sk_linear_from_srgb[i]); + if (i != actual) { + ERRORF(r, "%d -> %d\n", i, actual); + } + } // Should be monotonic between 0 and 1. - // We don't bother checking denorm values. - int tolerated_regressions = 0; -#if defined(SK_ARM_HAS_NEON) - // Values around 0.166016 are usually 72 but drop briefly (41 floats) down to 71. - tolerated_regressions = 1; -#endif uint8_t prev = 0; - for (float f = FLT_MIN; f <= 1.0f; ) { + for (float f = FLT_MIN; f <= 1.0f; ) { // We don't bother checking denorm values. uint8_t srgb = linear_to_srgb(f); - REPORTER_ASSERT(r, srgb >= prev || tolerated_regressions > 0); - if (srgb < prev) { tolerated_regressions--; } + REPORTER_ASSERT(r, srgb >= prev); prev = srgb; union { float flt; uint32_t bits; } pun = { f };