Tune linear->sRGB constants to round-trip all bytes.

author mtklein <mtklein@chromium.org>

Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)

committer Commit bot <commit-bot@chromium.org>

Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)
author mtklein <mtklein@chromium.org>
Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)
committer Commit bot <commit-bot@chromium.org>
Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp

index b3b8798..0243940 100644 (file)
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -139,24 +139,15 @@ static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x,
      st->next(x, r,g,b,a, dr,dg,db,da);
  }
  
-static Sk4f clamp(const Sk4f& x) {
-    return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f);
-}
-
  static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x,
                                       Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
                                       Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
      auto ptr = st->ctx<uint32_t*>() + x;
  
-    r = clamp(sk_linear_to_srgb(r));
-    g = clamp(sk_linear_to_srgb(g));
-    b = clamp(sk_linear_to_srgb(b));
-    a = clamp(         255.0f * a );
-
-    ( SkNx_cast<int>(r)
-    | SkNx_cast<int>(g) << 8
-    | SkNx_cast<int>(b) << 16
-    | SkNx_cast<int>(a) << 24 ).store(ptr);
+    ( sk_linear_to_srgb(r)
+    | sk_linear_to_srgb(g) << 8
+    | sk_linear_to_srgb(b) << 16
+    | Sk4f_round(255.0f*a) << 24).store(ptr);
  }
  
  static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
@@ -164,9 +155,8 @@ static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
                                            Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
      auto ptr = st->ctx<uint32_t*>() + x;
  
-    auto rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0});
-    rgba = {rgba[0], rgba[1], rgba[2], 255.0f*a[0]};
-    rgba = clamp(rgba);
+    Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0});
+    rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)};
  
      SkNx_cast<uint8_t>(rgba).store(ptr);
  }
diff --git a/src/core/SkSRGB.h b/src/core/SkSRGB.h

index d567a96..d3baa74 100644 (file)
--- a/src/core/SkSRGB.h
+++ b/src/core/SkSRGB.h
@@ -14,37 +14,33 @@
   *
   *  Current best practices:
   *      - for sRGB -> linear, lookup R,G,B in sk_linear_from_srgb;
- *      - for linear -> sRGB, call sk_linear_to_srgb() for R,G,B, and round;
+ *      - for linear -> sRGB, call sk_linear_to_srgb() for R,G,B;
   *      - the alpha channel is linear in both formats, needing at most *(1/255.0f) or *255.0f.
   *
- *  sk_linear_to_srgb()'s output requires rounding; it does not round for you.
- *
- *  Given inputs in [0,1], sk_linear_to_srgb() will not underflow 0 but may overflow 255.
- *  The overflow is small enough to be handled by rounding.
- *  (But if you don't trust the inputs are in [0,1], you'd better clamp both sides immediately.)
- *
   *  sk_linear_to_srgb() will run a little faster than usual when compiled with SSE4.1+.
   */
  
  extern const float sk_linear_from_srgb[256];
  
-static inline Sk4f sk_linear_to_srgb(const Sk4f& x) {
+static inline Sk4i sk_linear_to_srgb(const Sk4f& x) {
      // Approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels).
-    // For 0.00000f <= x <  0.00349f,    12.92 * x
-    // For 0.00349f <= x <= 1.00000f,    0.679*(x.^0.5) + 0.423*x.^(0.25) - 0.101
-    // Note that 0.00349 was selected because it is a point where both functions produce the
-    // same pixel value when rounded.
+    //
+    // Tuned by brute force to minimize the number of bytes that fail to round trip,
+    // here 0 (of 256), and then to minimize the number of points halfway between bytes
+    // (in linear space) that fail to hit the right byte, here 131 (of 255), and to
+    // minimize the number of monotonicity regressions over the range [0,1], here 0.
+
      auto rsqrt = x.rsqrt(),
           sqrt  = rsqrt.invert(),
           ftrt  = rsqrt.rsqrt();
  
-    auto lo = (12.92f * 255.0f) * x;
+    auto lo = (13.0471f * 255.0f) * x;
  
-    auto hi = (-0.101115084998961f * 255.0f) +
-              (+0.678513029959381f * 255.0f) * sqrt +
-              (+0.422602055039580f * 255.0f) * ftrt;
+    auto hi = (-0.0974983f * 255.0f)
+            + (+0.687999f  * 255.0f) * sqrt
+            + (+0.412999f  * 255.0f) * ftrt;
  
-    return (x < 0.00349f).thenElse(lo, hi);
+    return SkNx_cast<int>( (x < 0.0048f).thenElse(lo, hi) );
  }
  
  #endif//SkSRGB_DEFINED
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h

index 3bb11f5..af683e1 100644 (file)
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -16,20 +16,20 @@
  
  namespace SK_OPTS_NS {
  
-static Sk4f linear_to_2dot2(const Sk4f& x) {
+static Sk4f clamp_0_1(const Sk4f& x) {
+    // The order of the arguments is important here.  We want to make sure that NaN
+    // clamps to zero.  Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
+    return Sk4f::Min(Sk4f::Max(x, 0.0f), 1.0f);
+}
+
+static Sk4i linear_to_2dot2(const Sk4f& x) {
      // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
      auto x2  = x.rsqrt(),                            // x^(-1/2)
           x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(),   // x^(-1/32)
           x64 = x32.rsqrt();                          // x^(+1/64)
  
      // 29 = 32 - 2 - 1
-    return 255.0f * x2.invert() * x32 * x64.invert();
-}
-
-static Sk4f clamp_0_to_255(const Sk4f& x) {
-    // The order of the arguments is important here.  We want to make sure that NaN
-    // clamps to zero.  Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
-    return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f);
+    return Sk4f_round(255.0f * x2.invert() * x32 * x64.invert());
  }
  
  enum DstGamma {
@@ -79,21 +79,18 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
  
          auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables] {
              if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
-                Sk4f (*linear_to_curve)(const Sk4f&) =
+                Sk4i (*linear_to_curve)(const Sk4f&) =
                          (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2;
  
-                dstReds   = linear_to_curve(dstReds);
-                dstGreens = linear_to_curve(dstGreens);
-                dstBlues  = linear_to_curve(dstBlues);
+                auto reds   = linear_to_curve(clamp_0_1(dstReds));
+                auto greens = linear_to_curve(clamp_0_1(dstGreens));
+                auto blues  = linear_to_curve(clamp_0_1(dstBlues));
  
-                dstReds   = clamp_0_to_255(dstReds);
-                dstGreens = clamp_0_to_255(dstGreens);
-                dstBlues  = clamp_0_to_255(dstBlues);
  
-                auto rgba = (Sk4f_round(dstReds)   << SK_R32_SHIFT)
-                          | (Sk4f_round(dstGreens) << SK_G32_SHIFT)
-                          | (Sk4f_round(dstBlues)  << SK_B32_SHIFT)
-                          | (Sk4i{      0xFF       << SK_A32_SHIFT});
+                auto rgba = (reds       << SK_R32_SHIFT)
+                          | (greens     << SK_G32_SHIFT)
+                          | (blues      << SK_B32_SHIFT)
+                          | (Sk4i{0xFF} << SK_A32_SHIFT);
                  rgba.store((uint32_t*) dst);
  
                  dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
@@ -155,15 +152,13 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
          auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
  
          if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
-            Sk4f (*linear_to_curve)(const Sk4f&) =
+            Sk4i (*linear_to_curve)(const Sk4f&) =
                      (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2;
  
-            dstPixel = linear_to_curve(dstPixel);
-
-            dstPixel = clamp_0_to_255(dstPixel);
+            auto pixel = linear_to_curve(clamp_0_1(dstPixel));
  
              uint32_t rgba;
-            SkNx_cast<uint8_t>(Sk4f_round(dstPixel)).store(&rgba);
+            SkNx_cast<uint8_t>(pixel).store(&rgba);
              rgba |= 0xFF000000;
              *((uint32_t*) dst) = SkSwizzle_RGBA_to_PMColor(rgba);
              dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
diff --git a/tests/SRGBTest.cpp b/tests/SRGBTest.cpp

index 65bfc59..43ec027 100644 (file)
--- a/tests/SRGBTest.cpp
+++ b/tests/SRGBTest.cpp
@@ -11,28 +11,24 @@
  #include <math.h>
  
  static uint8_t linear_to_srgb(float l) {
-    // Round float to int, truncate that to uint8_t.
-    return (uint8_t)Sk4f_round( sk_linear_to_srgb(Sk4f{l}) )[0];
+    return (uint8_t)sk_linear_to_srgb(Sk4f{l})[0];
  }
  
  DEF_TEST(sk_linear_to_srgb, r) {
-    // Should map 0 -> 0 and 1 -> 1.
-    REPORTER_ASSERT(r,   0 == linear_to_srgb(0.0f));
-    REPORTER_ASSERT(r, 255 == linear_to_srgb(1.0f));
+    // All bytes should round trip.
+    for (int i = 0; i < 256; i++) {
+        int actual = linear_to_srgb(sk_linear_from_srgb[i]);
+        if (i != actual) {
+            ERRORF(r, "%d -> %d\n", i, actual);
+        }
+    }
  
      // Should be monotonic between 0 and 1.
-    // We don't bother checking denorm values.
-    int tolerated_regressions = 0;
-#if defined(SK_ARM_HAS_NEON)
-    // Values around 0.166016 are usually 72 but drop briefly (41 floats) down to 71.
-    tolerated_regressions = 1;
-#endif
      uint8_t prev = 0;
-    for (float f = FLT_MIN; f <= 1.0f; ) {
+    for (float f = FLT_MIN; f <= 1.0f; ) {  // We don't bother checking denorm values.
          uint8_t srgb = linear_to_srgb(f);
  
-        REPORTER_ASSERT(r, srgb >= prev || tolerated_regressions > 0);
-        if (srgb < prev) { tolerated_regressions--; }
+        REPORTER_ASSERT(r, srgb >= prev);
          prev = srgb;
  
          union { float flt; uint32_t bits; } pun = { f };
author	mtklein <mtklein@chromium.org>
	Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)
committer	Commit bot <commit-bot@chromium.org>
	Wed, 20 Jul 2016 19:10:11 +0000 (12:10 -0700)
bench/SkRasterPipelineBench.cpp		patch \| blob \| history
src/core/SkSRGB.h		patch \| blob \| history
src/opts/SkColorXform_opts.h		patch \| blob \| history
tests/SRGBTest.cpp		patch \| blob \| history