sk_linear_from_srgb_math
authorMike Klein <mtklein@chromium.org>
Tue, 4 Oct 2016 13:29:32 +0000 (09:29 -0400)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Tue, 4 Oct 2016 14:40:58 +0000 (14:40 +0000)
Looks great (imperceptibly different) but ~10% slower on both ARMv8 and x86-64.  Probably need to hide the table-or-math logic behind Sk4f/Sk8f unless we find faster math.

I do like the new look of the pipeline stages though.  A lot clearer.

BUG=skia:

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2880

CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I44952237d56ba167445b07d4830eb8959c4d47b7
Reviewed-on: https://skia-review.googlesource.com/2880
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Matt Sarett <msarett@google.com>
src/core/SkSRGB.h
src/opts/SkRasterPipeline_opts.h

index 31fd4ae..e60e288 100644 (file)
@@ -62,4 +62,23 @@ static inline Sk4i sk_linear_to_srgb_noclamp(const Sk4f& x) {
     return SkNx_cast<int>(f);
 }
 
+// sRGB -> linear, using math instead of table lookups, scaling better to larger SIMD vectors.
+static inline Sk4f sk_linear_from_srgb_math(const Sk4i& s) {
+    auto x = SkNx_cast<float>(s);
+
+    const float u = 1/255.0f;  // x is [0,255], so x^n needs scaling by u^n.
+
+    // Non-linear segment of sRGB curve approximated by
+    // l = 0.0025 + 0.6975x^2 + 0.3x^3
+    const float k0 = 0.0025f,
+                k2 = 0.6975f * u*u,
+                k3 = 0.3000f * u*u*u;
+    auto hi = k0 + (k2 + k3*x) * (x*x);
+
+    // Linear segment of sRGB curve: the normal slope, extended a little further than normal.
+    auto lo = x * (u/12.92f);
+
+    return (x < 14.025f).thenElse(lo, hi);
+}
+
 #endif//SkSRGB_DEFINED
index fe0fde8..a23940e 100644 (file)
@@ -244,79 +244,21 @@ namespace SK_OPTS_NS {
     KERNEL_Sk4f(load_d_srgb) {
         auto ptr = (const uint32_t*)ctx + x;
 
-        if (tail) {
-            float rs[] = {0,0,0,0},
-                  gs[] = {0,0,0,0},
-                  bs[] = {0,0,0,0},
-                  as[] = {0,0,0,0};
-            for (size_t i = 0; i < tail; i++) {
-                rs[i] = sk_linear_from_srgb[(ptr[i] >> SK_R32_SHIFT) & 0xff];
-                gs[i] = sk_linear_from_srgb[(ptr[i] >> SK_G32_SHIFT) & 0xff];
-                bs[i] = sk_linear_from_srgb[(ptr[i] >> SK_B32_SHIFT) & 0xff];
-                as[i] =       (1/255.0f) *  (ptr[i] >> SK_A32_SHIFT)        ;
-            }
-            dr = Sk4f::Load(rs);
-            dg = Sk4f::Load(gs);
-            db = Sk4f::Load(bs);
-            da = Sk4f::Load(as);
-            return;
-        }
-
-        dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >> SK_R32_SHIFT) & 0xff] };
-
-        dg = { sk_linear_from_srgb[(ptr[0] >> SK_G32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >> SK_G32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >> SK_G32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >> SK_G32_SHIFT) & 0xff] };
-
-        db = { sk_linear_from_srgb[(ptr[0] >> SK_B32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >> SK_B32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >> SK_B32_SHIFT) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] };
-
-        da = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
+        auto px = load_tail(tail, (const int*)ptr);
+        dr =    sk_linear_from_srgb_math((px >> SK_R32_SHIFT) & 0xff);
+        dg =    sk_linear_from_srgb_math((px >> SK_G32_SHIFT) & 0xff);
+        db =    sk_linear_from_srgb_math((px >> SK_B32_SHIFT) & 0xff);
+        da = (1/255.0f)*SkNx_cast<float>((px >> SK_A32_SHIFT) & 0xff);
     }
 
     KERNEL_Sk4f(load_s_srgb) {
         auto ptr = (const uint32_t*)ctx + x;
 
-        if (tail) {
-            float rs[] = {0,0,0,0},
-                  gs[] = {0,0,0,0},
-                  bs[] = {0,0,0,0},
-                  as[] = {0,0,0,0};
-            for (size_t i = 0; i < tail; i++) {
-                rs[i] = sk_linear_from_srgb[(ptr[i] >> SK_R32_SHIFT) & 0xff];
-                gs[i] = sk_linear_from_srgb[(ptr[i] >> SK_G32_SHIFT) & 0xff];
-                bs[i] = sk_linear_from_srgb[(ptr[i] >> SK_B32_SHIFT) & 0xff];
-                as[i] =       (1/255.0f) *  (ptr[i] >> SK_A32_SHIFT)        ;
-            }
-            r = Sk4f::Load(rs);
-            g = Sk4f::Load(gs);
-            b = Sk4f::Load(bs);
-            a = Sk4f::Load(as);
-            return;
-        }
-
-        r = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >> SK_R32_SHIFT) & 0xff] };
-
-        g = { sk_linear_from_srgb[(ptr[0] >> SK_G32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >> SK_G32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >> SK_G32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >> SK_G32_SHIFT) & 0xff] };
-
-        b = { sk_linear_from_srgb[(ptr[0] >> SK_B32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >> SK_B32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >> SK_B32_SHIFT) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] };
-
-        a = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
+        auto px = load_tail(tail, (const int*)ptr);
+        r =    sk_linear_from_srgb_math((px >> SK_R32_SHIFT) & 0xff);
+        g =    sk_linear_from_srgb_math((px >> SK_G32_SHIFT) & 0xff);
+        b =    sk_linear_from_srgb_math((px >> SK_B32_SHIFT) & 0xff);
+        a = (1/255.0f)*SkNx_cast<float>((px >> SK_A32_SHIFT) & 0xff);
     }
 
     KERNEL_Sk4f(store_srgb) {