Reland "Add RasterPipeline implementation for SkColorSpaceXform"
authorMatt Sarett <msarett@google.com>
Thu, 1 Dec 2016 19:46:12 +0000 (14:46 -0500)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Thu, 1 Dec 2016 20:59:36 +0000 (20:59 +0000)
This is initially turned on for Linux debug builds,
which allows us to start testing.

Chrome for Android is a really good candidate for
this (will appreciate the code size savings), but
I'd first like to run some tests to understand the
performance/size tradeoffs a little better.

BUG:660416

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD

Change-Id: Ifc80e663767df6bb767abb8b12b1ec5cec644ec5
Reviewed-on: https://skia-review.googlesource.com/5452
Reviewed-by: Matt Sarett <msarett@google.com>
Commit-Queue: Matt Sarett <msarett@google.com>

src/core/SkColorSpaceXform.cpp
src/core/SkColorSpaceXform_Base.h
src/core/SkRasterPipeline.h
src/opts/SkRasterPipeline_opts.h

index 450a643..3939853 100644 (file)
 #include "SkColorSpaceXformPriv.h"
 #include "SkHalf.h"
 #include "SkOpts.h"
+#include "SkRasterPipeline.h"
 #include "SkSRGB.h"
 
+#if defined(SK_DEBUG) && defined(SK_BUILD_FOR_UNIX)
+static constexpr bool kUseRasterPipeline = true;
+#else
+static constexpr bool kUseRasterPipeline = false;
+#endif
+
 static constexpr float sk_linear_from_2dot2[256] = {
         0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f,
         0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f,
@@ -350,6 +357,27 @@ std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(SkColorSpace* srcSpace
         }
     }
 
+    if (kUseRasterPipeline) {
+        SrcGamma srcGamma = srcSpaceXYZ->gammaIsLinear() ? kLinear_SrcGamma : kTable_SrcGamma;
+        DstGamma dstGamma;
+        switch (dstSpaceXYZ->gammaNamed()) {
+            case kSRGB_SkGammaNamed:
+                dstGamma = kSRGB_DstGamma;
+                break;
+            case k2Dot2Curve_SkGammaNamed:
+                dstGamma = k2Dot2_DstGamma;
+                break;
+            case kLinear_SkGammaNamed:
+                dstGamma = kLinear_DstGamma;
+                break;
+            default:
+                dstGamma = kTable_DstGamma;
+                break;
+        }
+        return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
+                srcSpaceXYZ, srcToDst, dstSpaceXYZ, csm, srcGamma, dstGamma));
+    }
+
     switch (csm) {
         case kNone_ColorSpaceMatch:
             switch (dstSpaceXYZ->gammaNamed()) {
@@ -1264,8 +1292,172 @@ bool SkColorSpaceXform::apply(ColorFormat dstColorFormat, void* dst, ColorFormat
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
+SkColorSpaceXform_Pipeline::SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace,
+                                                       const SkMatrix44& srcToDst,
+                                                       SkColorSpace_XYZ* dstSpace,
+                                                       ColorSpaceMatch csm,
+                                                       SrcGamma srcGamma,
+                                                       DstGamma dstGamma)
+    : fCSM(csm)
+    , fSrcGamma(srcGamma)
+    , fDstGamma(dstGamma)
+{
+    fSrcToDst[ 0] = srcToDst.get(0, 0);
+    fSrcToDst[ 1] = srcToDst.get(1, 0);
+    fSrcToDst[ 2] = srcToDst.get(2, 0);
+    fSrcToDst[ 3] = srcToDst.get(0, 1);
+    fSrcToDst[ 4] = srcToDst.get(1, 1);
+    fSrcToDst[ 5] = srcToDst.get(2, 1);
+    fSrcToDst[ 6] = srcToDst.get(0, 2);
+    fSrcToDst[ 7] = srcToDst.get(1, 2);
+    fSrcToDst[ 8] = srcToDst.get(2, 2);
+    fSrcToDst[ 9] = srcToDst.get(0, 3);
+    fSrcToDst[10] = srcToDst.get(1, 3);
+    fSrcToDst[11] = srcToDst.get(2, 3);
+
+    const int numSrcTables = num_tables(srcSpace);
+    const size_t srcEntries = numSrcTables * 256;
+    const bool srcGammasAreMatching = (1 >= numSrcTables);
+    fSrcStorage.reset(srcEntries);
+    build_gamma_tables(fSrcGammaTables, fSrcStorage.get(), 256, srcSpace, kToLinear,
+                       srcGammasAreMatching);
+
+    const int numDstTables = num_tables(dstSpace);
+    dstSpace->toDstGammaTables(fDstGammaTables, &fDstStorage, numDstTables);
+}
+
+bool SkColorSpaceXform_Pipeline::onApply(ColorFormat dstColorFormat, void* dst,
+                                         ColorFormat srcColorFormat, const void* src, int len,
+                                         SkAlphaType alphaType) const {
+    if (kFull_ColorSpaceMatch == fCSM) {
+        if (kPremul_SkAlphaType != alphaType) {
+            if ((kRGBA_8888_ColorFormat == dstColorFormat &&
+                 kRGBA_8888_ColorFormat == srcColorFormat) ||
+                (kBGRA_8888_ColorFormat == dstColorFormat &&
+                 kBGRA_8888_ColorFormat == srcColorFormat))
+            {
+                memcpy(dst, src, len * sizeof(uint32_t));
+                return true;
+            }
+
+            if ((kRGBA_8888_ColorFormat == dstColorFormat &&
+                 kBGRA_8888_ColorFormat == srcColorFormat) ||
+                (kBGRA_8888_ColorFormat == dstColorFormat &&
+                 kRGBA_8888_ColorFormat == srcColorFormat))
+            {
+                SkOpts::RGBA_to_BGRA((uint32_t*) dst, src, len);
+                return true;
+            }
+        }
+    }
+
+    if (kRGBA_F16_ColorFormat == srcColorFormat || kRGBA_F32_ColorFormat == srcColorFormat) {
+        return false;
+    }
+
+    SkRasterPipeline pipeline;
+
+    LoadTablesContext loadTables;
+    if (kLinear_SrcGamma == fSrcGamma) {
+        pipeline.append(SkRasterPipeline::load_8888, &src);
+        if (kBGRA_8888_ColorFormat == srcColorFormat) {
+            pipeline.append(SkRasterPipeline::swap_rb);
+        }
+    } else {
+        loadTables.fSrc = (const uint32_t*) src;
+        loadTables.fG = fSrcGammaTables[1];
+        if (kRGBA_8888_ColorFormat == srcColorFormat) {
+            loadTables.fR = fSrcGammaTables[0];
+            loadTables.fB = fSrcGammaTables[2];
+            pipeline.append(SkRasterPipeline::load_tables, &loadTables);
+        } else {
+            loadTables.fR = fSrcGammaTables[2];
+            loadTables.fB = fSrcGammaTables[0];
+            pipeline.append(SkRasterPipeline::load_tables, &loadTables);
+            pipeline.append(SkRasterPipeline::swap_rb);
+        }
+    }
+
+    if (kNone_ColorSpaceMatch == fCSM) {
+        pipeline.append(SkRasterPipeline::matrix_3x4, fSrcToDst);
+    }
+
+    if (kRGBA_8888_ColorFormat == dstColorFormat || kBGRA_8888_ColorFormat == dstColorFormat) {
+        pipeline.append(SkRasterPipeline::clamp_0);
+        pipeline.append(SkRasterPipeline::clamp_1);
+    }
+
+    if (kPremul_SkAlphaType == alphaType) {
+        pipeline.append(SkRasterPipeline::premul);
+    }
+
+    StoreTablesContext storeTables;
+    switch (fDstGamma) {
+        case kSRGB_DstGamma:
+            pipeline.append(SkRasterPipeline::to_srgb);
+            break;
+        case k2Dot2_DstGamma:
+            pipeline.append(SkRasterPipeline::to_2dot2);
+            break;
+        default:
+            break;
+    }
+
+    switch (dstColorFormat) {
+        case kRGBA_8888_ColorFormat:
+            if (kTable_DstGamma == fDstGamma) {
+                storeTables.fDst = (uint32_t*) dst;
+                storeTables.fR = fDstGammaTables[0];
+                storeTables.fG = fDstGammaTables[1];
+                storeTables.fB = fDstGammaTables[2];
+                storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
+                pipeline.append(SkRasterPipeline::store_tables, &storeTables);
+            } else {
+                pipeline.append(SkRasterPipeline::store_8888, &dst);
+            }
+            break;
+        case kBGRA_8888_ColorFormat:
+            if (kTable_DstGamma == fDstGamma) {
+                storeTables.fDst = (uint32_t*) dst;
+                storeTables.fR = fDstGammaTables[2];
+                storeTables.fG = fDstGammaTables[1];
+                storeTables.fB = fDstGammaTables[0];
+                storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
+                pipeline.append(SkRasterPipeline::swap_rb);
+                pipeline.append(SkRasterPipeline::store_tables, &storeTables);
+            } else {
+                pipeline.append(SkRasterPipeline::swap_rb);
+                pipeline.append(SkRasterPipeline::store_8888, &dst);
+            }
+            break;
+        case kRGBA_F16_ColorFormat:
+            if (kLinear_DstGamma != fDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f16, &dst);
+            break;
+        case kRGBA_F32_ColorFormat:
+            if (kLinear_DstGamma != fDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f32, &dst);
+            break;
+    }
+
+    pipeline.run(0, 0, len);
+    return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
 std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space) {
+    if (kUseRasterPipeline) {
+        return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
+                space, SkMatrix::I(), space, kNone_ColorSpaceMatch, kTable_SrcGamma,
+                kTable_DstGamma));
+    } else {
         return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_XYZ
                 <kTable_SrcGamma, kTable_DstGamma, kNone_ColorSpaceMatch>
                 (space, SkMatrix::I(), space));
+    }
 }
index a648677..3d17f67 100644 (file)
@@ -71,6 +71,47 @@ private:
     friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
 };
 
+struct LoadTablesContext {
+    const uint32_t* fSrc;
+    const float*    fR;
+    const float*    fG;
+    const float*    fB;
+};
+
+struct StoreTablesContext {
+    uint32_t*      fDst;
+    const uint8_t* fR;
+    const uint8_t* fG;
+    const uint8_t* fB;
+    int            fCount;
+};
+
+class SkColorSpaceXform_Pipeline : public SkColorSpaceXform_Base {
+protected:
+    virtual bool onApply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat, const void* src,
+                         int count, SkAlphaType alphaType) const;
+
+private:
+    SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace, const SkMatrix44& srcToDst,
+                               SkColorSpace_XYZ* dstSpace, ColorSpaceMatch csm, SrcGamma srcGamma,
+                               DstGamma dstGamma);
+
+    // Contain pointers into storage or pointers into precomputed tables.
+    const float*              fSrcGammaTables[3];
+    SkAutoTMalloc<float>      fSrcStorage;
+    const uint8_t*            fDstGammaTables[3];
+    sk_sp<SkData>             fDstStorage;
+
+    float                     fSrcToDst[12];
+
+    ColorSpaceMatch           fCSM;
+    SrcGamma                  fSrcGamma;
+    DstGamma                  fDstGamma;
+
+    friend class SkColorSpaceXform;
+    friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
+};
+
 // For testing.  Bypasses opts for when src and dst color spaces are equal.
 std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
 
index 7338c4d..8762386 100644 (file)
     M(unpremul) M(premul)                                        \
     M(set_rgb)                                                   \
     M(from_srgb) M(from_srgb_d) M(to_srgb)                       \
+    M(to_2dot2)                                                  \
     M(constant_color) M(store_f32)                               \
     M(load_565)  M(load_565_d)  M(store_565)                     \
     M(load_f16)  M(load_f16_d)  M(store_f16)                     \
     M(load_8888) M(load_8888_d) M(store_8888)                    \
+    M(load_tables) M(store_tables)                               \
     M(scale_u8) M(scale_1_float)                                 \
     M(lerp_u8) M(lerp_565) M(lerp_1_float)                       \
     M(dstatop) M(dstin) M(dstout) M(dstover)                     \
index efeca8b..d7f05a9 100644 (file)
@@ -185,13 +185,21 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
         return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
                     : SkNu::Load(src);
     }
+    SI SkNf load(size_t tail, const float* src) {
+        return tail ? _mm256_maskload_ps((const float*)src, mask(tail))
+                    : SkNf::Load(src);
+    }
     SI SkNi gather(size_t tail, const  int32_t* src, const SkNi& offset) {
-        return _mm256_mask_i32gather_epi32(SkNi(0).fVec,
-                                           (const int*)src, offset.fVec, mask(tail), 4);
+        auto m = mask(tail);
+        return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
     }
     SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) {
-        return _mm256_mask_i32gather_epi32(SkNi(0).fVec,
-                                           (const int*)src, offset.fVec, mask(tail), 4);
+        auto m = mask(tail);
+        return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
+    }
+    SI SkNf gather(size_t tail, const float* src, const SkNi& offset) {
+        auto m = _mm256_castsi256_ps(mask(tail));
+        return _mm256_mask_i32gather_ps(SkNf(0).fVec, (const float*)src, offset.fVec, m, 4);
     }
 
     static const char* bug = "I don't think MSAN understands maskstore.";
@@ -210,6 +218,13 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
         }
         v.store(dst);
     }
+    SI void store(size_t tail, const SkNf& v, float* dst) {
+        if (tail) {
+            _mm256_maskstore_ps((float*)dst, mask(tail), v.fVec);
+            return sk_msan_mark_initialized(dst, dst+tail, bug);
+        }
+        v.store(dst);
+    }
 #endif
 
 SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
@@ -326,6 +341,22 @@ STAGE(to_srgb) {
     b = sk_linear_to_srgb_needs_round(b);
 }
 
+STAGE(to_2dot2) {
+    auto to_2dot2 = [](const SkNf& x) {
+        // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
+        auto x2  = x.rsqrt(),                            // x^(-1/2)
+             x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(),   // x^(-1/32)
+             x64 = x32.rsqrt();                          // x^(+1/64)
+
+        // 29 = 32 - 2 - 1
+        return x2.invert() * x32 * x64.invert();
+    };
+
+    r = to_2dot2(r);
+    g = to_2dot2(g);
+    b = to_2dot2(b);
+}
+
 // The default shader produces a constant color (from the SkPaint).
 STAGE(constant_color) {
     auto color = (const SkPM4f*)ctx;
@@ -516,6 +547,33 @@ STAGE(store_8888) {
                 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
 }
 
+STAGE(load_tables) {
+    auto loadCtx = (const LoadTablesContext*)ctx;
+    auto ptr = loadCtx->fSrc + x;
+
+    SkNu rgba = load(tail, ptr);
+    auto to_int = [](const SkNu& v) { return SkNi::Load(&v); };
+    r = gather(tail, loadCtx->fR, to_int((rgba >>  0) & 0xff));
+    g = gather(tail, loadCtx->fG, to_int((rgba >>  8) & 0xff));
+    b = gather(tail, loadCtx->fB, to_int((rgba >> 16) & 0xff));
+    a = (1/255.0f) * SkNx_cast<float>(to_int(rgba >> 24));
+}
+
+STAGE(store_tables) {
+    auto storeCtx = (const StoreTablesContext*)ctx;
+    auto ptr = storeCtx->fDst + x;
+
+    float scale = storeCtx->fCount - 1;
+    SkNi ri = SkNx_cast<int>(scale * r + 0.5f);
+    SkNi gi = SkNx_cast<int>(scale * g + 0.5f);
+    SkNi bi = SkNx_cast<int>(scale * b + 0.5f);
+
+    store(tail, ( SkNx_cast<int>(gather(tail, storeCtx->fR, ri)) << 0
+                | SkNx_cast<int>(gather(tail, storeCtx->fG, gi)) << 8
+                | SkNx_cast<int>(gather(tail, storeCtx->fB, bi)) << 16
+                | SkNx_cast<int>(255.0f * a + 0.5f)              << 24), (int*)ptr);
+}
+
 SI SkNf inv(const SkNf& x) { return 1.0f - x; }
 
 RGBA_XFERMODE(clear)    { return 0.0f; }