Use new jpeg_crop_scanlines() API to optimize jpeg subset decodes
authormsarett <msarett@google.com>
Mon, 22 Feb 2016 20:27:46 +0000 (12:27 -0800)
committerCommit bot <commit-bot@chromium.org>
Mon, 22 Feb 2016 20:27:46 +0000 (12:27 -0800)
This was adapted from:
https://codereview.chromium.org/1530933003

Subset Decode Runtime (Original / Optimized) on Nexus 6P
TopLeft     0.51x
TopRight    0.56x
Middle      0.71x
BottomLeft  0.79x
BottomRight 0.79x

BUG=skia:4256
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1719073002

Review URL: https://codereview.chromium.org/1719073002

DEPS
dm/DM.cpp
dm/DMSrcSink.cpp
dm/DMSrcSink.h
gyp/codec.gyp
src/codec/SkCodec.cpp
src/codec/SkJpegCodec.cpp
src/codec/SkJpegCodec.h

diff --git a/DEPS b/DEPS
index 58fc188..219b347 100644 (file)
--- a/DEPS
+++ b/DEPS
@@ -24,7 +24,7 @@ deps = {
   "third_party/externals/dng_sdk" : "https://android.googlesource.com/platform/external/dng_sdk.git@6579353b8ee5d8aa1f1a96ae22798de9b41e19b8",
   "third_party/externals/piex"    : "https://android.googlesource.com/platform/external/piex.git@919011b9f6fce1d9d35bf51f3aab7509f66712cc",
 
-  "third_party/externals/libjpeg-turbo"             : "https://skia.googlesource.com/third_party/libjpeg-turbo.git@fa6a3ad4b883f5c3f448cf255ea280bf44e7d8ad",
+  "third_party/externals/libjpeg-turbo"             : "https://skia.googlesource.com/third_party/libjpeg-turbo.git@7bf1a3c9b06bede89cec37cec0b5085c0d6d6c13",
   # libjpeg-turbo depends on yasm to compile .asm files
   "third_party/externals/yasm/source/patched-yasm/" : "https://chromium.googlesource.com/chromium/deps/yasm/patched-yasm.git@4671120cd8558ce62ee8672ebf3eb6f5216f909b",
   "third_party/externals/yasm/binaries"             : "https://chromium.googlesource.com/chromium/deps/yasm/binaries.git@52f9b3f4b0aa06da24ef8b123058bb61ee468881",
index c8b6584..6810dc5 100644 (file)
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -242,6 +242,9 @@ static void push_codec_src(Path path, CodecSrc::Mode mode, CodecSrc::DstColorTyp
         case CodecSrc::kStripe_Mode:
             folder.append("stripe");
             break;
+        case CodecSrc::kCroppedScanline_Mode:
+            folder.append("crop");
+            break;
         case CodecSrc::kSubset_Mode:
             folder.append("codec_subset");
             break;
@@ -346,31 +349,38 @@ static void push_codec_srcs(Path path) {
     // SkJpegCodec natively supports scaling to: 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875
     const float nativeScales[] = { 0.125f, 0.25f, 0.375f, 0.5f, 0.625f, 0.750f, 0.875f, 1.0f };
 
-    const CodecSrc::Mode nativeModes[] = { CodecSrc::kCodec_Mode, CodecSrc::kCodecZeroInit_Mode,
-            CodecSrc::kScanline_Mode, CodecSrc::kStripe_Mode, CodecSrc::kSubset_Mode,
-            CodecSrc::kGen_Mode };
+    SkTArray<CodecSrc::Mode> nativeModes;
+    nativeModes.push_back(CodecSrc::kCodec_Mode);
+    nativeModes.push_back(CodecSrc::kCodecZeroInit_Mode);
+    nativeModes.push_back(CodecSrc::kGen_Mode);
+    switch (codec->getEncodedFormat()) {
+        case SkEncodedFormat::kJPEG_SkEncodedFormat:
+            nativeModes.push_back(CodecSrc::kScanline_Mode);
+            nativeModes.push_back(CodecSrc::kStripe_Mode);
+            nativeModes.push_back(CodecSrc::kCroppedScanline_Mode);
+            break;
+        case SkEncodedFormat::kWEBP_SkEncodedFormat:
+            nativeModes.push_back(CodecSrc::kSubset_Mode);
+            break;
+        default:
+            nativeModes.push_back(CodecSrc::kScanline_Mode);
+            nativeModes.push_back(CodecSrc::kStripe_Mode);
+            break;
+    }
 
-    CodecSrc::DstColorType colorTypes[3];
-    uint32_t numColorTypes;
+    SkTArray<CodecSrc::DstColorType> colorTypes;
+    colorTypes.push_back(CodecSrc::kGetFromCanvas_DstColorType);
     switch (codec->getInfo().colorType()) {
         case kGray_8_SkColorType:
-            colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType;
-            colorTypes[1] = CodecSrc::kGrayscale_Always_DstColorType;
+            colorTypes.push_back(CodecSrc::kGrayscale_Always_DstColorType);
             if (kWBMP_SkEncodedFormat == codec->getEncodedFormat()) {
-                colorTypes[2] = CodecSrc::kIndex8_Always_DstColorType;
-                numColorTypes = 3;
-            } else {
-                numColorTypes = 2;
+                colorTypes.push_back(CodecSrc::kIndex8_Always_DstColorType);
             }
             break;
         case kIndex_8_SkColorType:
-            colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType;
-            colorTypes[1] = CodecSrc::kIndex8_Always_DstColorType;
-            numColorTypes = 2;
+            colorTypes.push_back(CodecSrc::kIndex8_Always_DstColorType);
             break;
         default:
-            colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType;
-            numColorTypes = 1;
             break;
     }
 
@@ -396,9 +406,16 @@ static void push_codec_srcs(Path path) {
         }
 
         for (float scale : nativeScales) {
-            for (uint32_t i = 0; i < numColorTypes; i++) {
+            for (CodecSrc::DstColorType colorType : colorTypes) {
                 for (SkAlphaType alphaType : alphaModes) {
-                    push_codec_src(path, mode, colorTypes[i], alphaType, scale);
+                    // Only test kCroppedScanline_Mode when the alpha type is opaque.  The test is
+                    // slow and won't be interestingly different with different alpha types.
+                    if (CodecSrc::kCroppedScanline_Mode == mode &&
+                            kOpaque_SkAlphaType != alphaType) {
+                        continue;
+                    }
+
+                    push_codec_src(path, mode, colorType, alphaType, scale);
                 }
             }
         }
@@ -422,12 +439,12 @@ static void push_codec_srcs(Path path) {
     const int sampleSizes[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 
     for (int sampleSize : sampleSizes) {
-        for (uint32_t i = 0; i < numColorTypes; i++) {
+        for (CodecSrc::DstColorType colorType : colorTypes) {
             for (SkAlphaType alphaType : alphaModes) {
-                push_android_codec_src(path, AndroidCodecSrc::kFullImage_Mode, colorTypes[i],
+                push_android_codec_src(path, AndroidCodecSrc::kFullImage_Mode, colorType,
                         alphaType, sampleSize);
                 if (subset) {
-                    push_android_codec_src(path, AndroidCodecSrc::kDivisor_Mode, colorTypes[i],
+                    push_android_codec_src(path, AndroidCodecSrc::kDivisor_Mode, colorType,
                             alphaType, sampleSize);
                 }
             }
index 7ccaf22..bd881e8 100644 (file)
@@ -522,6 +522,31 @@ Error CodecSrc::draw(SkCanvas* canvas) const {
             canvas->drawBitmap(bitmap, 0, 0);
             break;
         }
+        case kCroppedScanline_Mode: {
+            const int width = decodeInfo.width();
+            const int height = decodeInfo.height();
+            // This value is chosen because, as we move across the image, it will sometimes
+            // align with the jpeg block sizes and it will sometimes not.  This allows us
+            // to test interestingly different code paths in the implementation.
+            const int tileSize = 36;
+
+            SkCodec::Options opts;
+            SkIRect subset;
+            for (int x = 0; x < width; x += tileSize) {
+                subset = SkIRect::MakeXYWH(x, 0, SkTMin(tileSize, width - x), height);
+                opts.fSubset = &subset;
+                if (SkCodec::kSuccess != codec->startScanlineDecode(decodeInfo, &opts,
+                        colorPtr, colorCountPtr)) {
+                    return "Could not start scanline decoder.";
+                }
+
+                codec->getScanlines(bitmap.getAddr(x, 0), height, bitmap.rowBytes());
+            }
+
+            premultiply_if_necessary(bitmap);
+            canvas->drawBitmap(bitmap, 0, 0);
+            break;
+        }
         case kSubset_Mode: {
             // Arbitrarily choose a divisor.
             int divisor = 2;
index a987c7d..3190576 100644 (file)
@@ -110,6 +110,7 @@ public:
         kCodecZeroInit_Mode,
         kScanline_Mode,
         kStripe_Mode, // Tests the skipping of scanlines
+        kCroppedScanline_Mode, // Tests (jpeg) cropped scanline optimization
         kSubset_Mode, // For codecs that support subsets directly.
         kGen_Mode,    // Test SkCodecImageGenerator (includes YUV)
     };
index c53d955..d8f6f66 100644 (file)
             'raw_codec',
           ],
         },],
+        ['skia_android_framework == 0', {
+          'defines': [
+            # TODO (msarett): Add this optimization to Android.
+            # https://buganizer.corp.google.com/u/0/issues/27290496
+            'TURBO_HAS_CROP',
+          ],
+        },],
       ],
     }, {
       # RAW codec needs exceptions. Due to that, it is a separate target. Its usage can be
index c4f7498..d7cfb37 100644 (file)
@@ -354,23 +354,28 @@ void SkCodec::fillIncompleteImage(const SkImageInfo& info, void* dst, size_t row
     const int linesRemaining = linesRequested - linesDecoded;
     SkSampler* sampler = this->getSampler(false);
 
+    int fillWidth = info.width();
+    if (fOptions.fSubset) {
+        fillWidth = fOptions.fSubset->width();
+    }
+
     switch (this->getScanlineOrder()) {
         case kTopDown_SkScanlineOrder:
         case kNone_SkScanlineOrder: {
-            const SkImageInfo fillInfo = info.makeWH(info.width(), linesRemaining);
+            const SkImageInfo fillInfo = info.makeWH(fillWidth, linesRemaining);
             fillDst = SkTAddOffset<void>(dst, linesDecoded * rowBytes);
             fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler);
             break;
         }
         case kBottomUp_SkScanlineOrder: {
             fillDst = dst;
-            const SkImageInfo fillInfo = info.makeWH(info.width(), linesRemaining);
+            const SkImageInfo fillInfo = info.makeWH(fillWidth, linesRemaining);
             fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler);
             break;
         }
         case kOutOfOrder_SkScanlineOrder: {
             SkASSERT(1 == linesRequested || this->getInfo().height() == linesRequested);
-            const SkImageInfo fillInfo = info.makeWH(info.width(), 1);
+            const SkImageInfo fillInfo = info.makeWH(fillWidth, 1);
             for (int srcY = linesDecoded; srcY < linesRequested; srcY++) {
                 fillDst = SkTAddOffset<void>(dst, this->outputScanline(srcY) * rowBytes);
                 fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler);
index e1440af..d360515 100644 (file)
@@ -80,6 +80,7 @@ SkJpegCodec::SkJpegCodec(const SkImageInfo& srcInfo, SkStream* stream,
     : INHERITED(srcInfo, stream)
     , fDecoderMgr(decoderMgr)
     , fReadyState(decoderMgr->dinfo()->global_state)
+    , fSwizzlerSubset(SkIRect::MakeEmpty())
 {}
 
 /*
@@ -371,7 +372,16 @@ void SkJpegCodec::initializeSwizzler(const SkImageInfo& dstInfo, const Options&
         srcConfig = SkSwizzler::kRGB;
     }
 
-    fSwizzler.reset(SkSwizzler::CreateSwizzler(srcConfig, nullptr, dstInfo, options));
+    Options swizzlerOptions = options;
+    if (options.fSubset) {
+        // Use fSwizzlerSubset if this is a subset decode.  This is necessary in the case
+        // where libjpeg-turbo provides a subset and then we need to subset it further.
+        // Also, verify that fSwizzlerSubset is initialized and valid.
+        SkASSERT(!fSwizzlerSubset.isEmpty() && fSwizzlerSubset.x() <= options.fSubset->x() &&
+                fSwizzlerSubset.width() == options.fSubset->width());
+        swizzlerOptions.fSubset = &fSwizzlerSubset;
+    }
+    fSwizzler.reset(SkSwizzler::CreateSwizzler(srcConfig, nullptr, dstInfo, swizzlerOptions));
     SkASSERT(fSwizzler);
     fStorage.reset(get_row_bytes(fDecoderMgr->dinfo()));
     fSrcRow = fStorage.get();
@@ -411,12 +421,60 @@ SkCodec::Result SkJpegCodec::onStartScanlineDecode(const SkImageInfo& dstInfo,
         return kInvalidInput;
     }
 
+    if (options.fSubset) {
+        fSwizzlerSubset = *options.fSubset;
+    }
+
+#ifdef TURBO_HAS_CROP
+    if (options.fSubset) {
+        uint32_t startX = options.fSubset->x();
+        uint32_t width = options.fSubset->width();
+
+        // libjpeg-turbo may need to align startX to a multiple of the IDCT
+        // block size.  If this is the case, it will decrease the value of
+        // startX to the appropriate alignment and also increase the value
+        // of width so that the right edge of the requested subset remains
+        // the same.
+        jpeg_crop_scanline(fDecoderMgr->dinfo(), &startX, &width);
+
+        SkASSERT(startX <= (uint32_t) options.fSubset->x());
+        SkASSERT(width >= (uint32_t) options.fSubset->width());
+        SkASSERT(startX + width >= (uint32_t) options.fSubset->right());
+
+        // Instruct the swizzler (if it is necessary) to further subset the
+        // output provided by libjpeg-turbo.
+        //
+        // We set this here (rather than in the if statement below), so that
+        // if (1) we don't need a swizzler for the subset, and (2) we need a
+        // swizzler for CMYK, the swizzler will still use the proper subset
+        // dimensions.
+        //
+        // Note that the swizzler will ignore the y and height parameters of
+        // the subset.  Since the scanline decoder (and the swizzler) handle
+        // one row at a time, only the subsetting in the x-dimension matters.
+        fSwizzlerSubset.setXYWH(options.fSubset->x() - startX, 0,
+                options.fSubset->width(), options.fSubset->height());
+
+        // We will need a swizzler if libjpeg-turbo cannot provide the exact
+        // subset that we request.
+        if (startX != (uint32_t) options.fSubset->x() ||
+                width != (uint32_t) options.fSubset->width()) {
+            this->initializeSwizzler(dstInfo, options);
+        }
+    }
+
+    // Make sure we have a swizzler if we are converting from CMYK.
+    if (!fSwizzler && JCS_CMYK == fDecoderMgr->dinfo()->out_color_space) {
+        this->initializeSwizzler(dstInfo, options);
+    }
+#else
     // We will need a swizzler if we are performing a subset decode or
     // converting from CMYK.
     J_COLOR_SPACE colorSpace = fDecoderMgr->dinfo()->out_color_space;
     if (options.fSubset || JCS_CMYK == colorSpace || JCS_RGB == colorSpace) {
         this->initializeSwizzler(dstInfo, options);
     }
+#endif
 
     return kSuccess;
 }
index 06685cf..bb5ce75 100644 (file)
@@ -115,6 +115,10 @@ private:
     // scanline decoding
     SkAutoTMalloc<uint8_t>     fStorage;    // Only used if sampling is needed
     uint8_t*                   fSrcRow;     // Only used if sampling is needed
+    // libjpeg-turbo provides some subsetting.  In the case that libjpeg-turbo
+    // cannot take the exact the subset that we need, we will use the swizzler
+    // to further subset the output from libjpeg-turbo.
+    SkIRect                    fSwizzlerSubset;
     SkAutoTDelete<SkSwizzler>  fSwizzler;
     
     typedef SkCodec INHERITED;