From 91c22b2ea6bd13a31321ead01645467f21858cd0 Mon Sep 17 00:00:00 2001 From: msarett Date: Mon, 22 Feb 2016 12:27:46 -0800 Subject: [PATCH] Use new jpeg_crop_scanlines() API to optimize jpeg subset decodes This was adapted from: https://codereview.chromium.org/1530933003 Subset Decode Runtime (Original / Optimized) on Nexus 6P TopLeft 0.51x TopRight 0.56x Middle 0.71x BottomLeft 0.79x BottomRight 0.79x BUG=skia:4256 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1719073002 Review URL: https://codereview.chromium.org/1719073002 --- DEPS | 2 +- dm/DM.cpp | 59 +++++++++++++++++++++++++++++----------------- dm/DMSrcSink.cpp | 25 ++++++++++++++++++++ dm/DMSrcSink.h | 1 + gyp/codec.gyp | 7 ++++++ src/codec/SkCodec.cpp | 11 ++++++--- src/codec/SkJpegCodec.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++++- src/codec/SkJpegCodec.h | 4 ++++ 8 files changed, 143 insertions(+), 26 deletions(-) diff --git a/DEPS b/DEPS index 58fc188..219b347 100644 --- a/DEPS +++ b/DEPS @@ -24,7 +24,7 @@ deps = { "third_party/externals/dng_sdk" : "https://android.googlesource.com/platform/external/dng_sdk.git@6579353b8ee5d8aa1f1a96ae22798de9b41e19b8", "third_party/externals/piex" : "https://android.googlesource.com/platform/external/piex.git@919011b9f6fce1d9d35bf51f3aab7509f66712cc", - "third_party/externals/libjpeg-turbo" : "https://skia.googlesource.com/third_party/libjpeg-turbo.git@fa6a3ad4b883f5c3f448cf255ea280bf44e7d8ad", + "third_party/externals/libjpeg-turbo" : "https://skia.googlesource.com/third_party/libjpeg-turbo.git@7bf1a3c9b06bede89cec37cec0b5085c0d6d6c13", # libjpeg-turbo depends on yasm to compile .asm files "third_party/externals/yasm/source/patched-yasm/" : "https://chromium.googlesource.com/chromium/deps/yasm/patched-yasm.git@4671120cd8558ce62ee8672ebf3eb6f5216f909b", "third_party/externals/yasm/binaries" : "https://chromium.googlesource.com/chromium/deps/yasm/binaries.git@52f9b3f4b0aa06da24ef8b123058bb61ee468881", diff --git a/dm/DM.cpp b/dm/DM.cpp index c8b6584..6810dc5 100644 --- a/dm/DM.cpp +++ b/dm/DM.cpp @@ -242,6 +242,9 @@ static void push_codec_src(Path path, CodecSrc::Mode mode, CodecSrc::DstColorTyp case CodecSrc::kStripe_Mode: folder.append("stripe"); break; + case CodecSrc::kCroppedScanline_Mode: + folder.append("crop"); + break; case CodecSrc::kSubset_Mode: folder.append("codec_subset"); break; @@ -346,31 +349,38 @@ static void push_codec_srcs(Path path) { // SkJpegCodec natively supports scaling to: 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875 const float nativeScales[] = { 0.125f, 0.25f, 0.375f, 0.5f, 0.625f, 0.750f, 0.875f, 1.0f }; - const CodecSrc::Mode nativeModes[] = { CodecSrc::kCodec_Mode, CodecSrc::kCodecZeroInit_Mode, - CodecSrc::kScanline_Mode, CodecSrc::kStripe_Mode, CodecSrc::kSubset_Mode, - CodecSrc::kGen_Mode }; + SkTArray nativeModes; + nativeModes.push_back(CodecSrc::kCodec_Mode); + nativeModes.push_back(CodecSrc::kCodecZeroInit_Mode); + nativeModes.push_back(CodecSrc::kGen_Mode); + switch (codec->getEncodedFormat()) { + case SkEncodedFormat::kJPEG_SkEncodedFormat: + nativeModes.push_back(CodecSrc::kScanline_Mode); + nativeModes.push_back(CodecSrc::kStripe_Mode); + nativeModes.push_back(CodecSrc::kCroppedScanline_Mode); + break; + case SkEncodedFormat::kWEBP_SkEncodedFormat: + nativeModes.push_back(CodecSrc::kSubset_Mode); + break; + default: + nativeModes.push_back(CodecSrc::kScanline_Mode); + nativeModes.push_back(CodecSrc::kStripe_Mode); + break; + } - CodecSrc::DstColorType colorTypes[3]; - uint32_t numColorTypes; + SkTArray colorTypes; + colorTypes.push_back(CodecSrc::kGetFromCanvas_DstColorType); switch (codec->getInfo().colorType()) { case kGray_8_SkColorType: - colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType; - colorTypes[1] = CodecSrc::kGrayscale_Always_DstColorType; + colorTypes.push_back(CodecSrc::kGrayscale_Always_DstColorType); if (kWBMP_SkEncodedFormat == codec->getEncodedFormat()) { - colorTypes[2] = CodecSrc::kIndex8_Always_DstColorType; - numColorTypes = 3; - } else { - numColorTypes = 2; + colorTypes.push_back(CodecSrc::kIndex8_Always_DstColorType); } break; case kIndex_8_SkColorType: - colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType; - colorTypes[1] = CodecSrc::kIndex8_Always_DstColorType; - numColorTypes = 2; + colorTypes.push_back(CodecSrc::kIndex8_Always_DstColorType); break; default: - colorTypes[0] = CodecSrc::kGetFromCanvas_DstColorType; - numColorTypes = 1; break; } @@ -396,9 +406,16 @@ static void push_codec_srcs(Path path) { } for (float scale : nativeScales) { - for (uint32_t i = 0; i < numColorTypes; i++) { + for (CodecSrc::DstColorType colorType : colorTypes) { for (SkAlphaType alphaType : alphaModes) { - push_codec_src(path, mode, colorTypes[i], alphaType, scale); + // Only test kCroppedScanline_Mode when the alpha type is opaque. The test is + // slow and won't be interestingly different with different alpha types. + if (CodecSrc::kCroppedScanline_Mode == mode && + kOpaque_SkAlphaType != alphaType) { + continue; + } + + push_codec_src(path, mode, colorType, alphaType, scale); } } } @@ -422,12 +439,12 @@ static void push_codec_srcs(Path path) { const int sampleSizes[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; for (int sampleSize : sampleSizes) { - for (uint32_t i = 0; i < numColorTypes; i++) { + for (CodecSrc::DstColorType colorType : colorTypes) { for (SkAlphaType alphaType : alphaModes) { - push_android_codec_src(path, AndroidCodecSrc::kFullImage_Mode, colorTypes[i], + push_android_codec_src(path, AndroidCodecSrc::kFullImage_Mode, colorType, alphaType, sampleSize); if (subset) { - push_android_codec_src(path, AndroidCodecSrc::kDivisor_Mode, colorTypes[i], + push_android_codec_src(path, AndroidCodecSrc::kDivisor_Mode, colorType, alphaType, sampleSize); } } diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp index 7ccaf22..bd881e8 100644 --- a/dm/DMSrcSink.cpp +++ b/dm/DMSrcSink.cpp @@ -522,6 +522,31 @@ Error CodecSrc::draw(SkCanvas* canvas) const { canvas->drawBitmap(bitmap, 0, 0); break; } + case kCroppedScanline_Mode: { + const int width = decodeInfo.width(); + const int height = decodeInfo.height(); + // This value is chosen because, as we move across the image, it will sometimes + // align with the jpeg block sizes and it will sometimes not. This allows us + // to test interestingly different code paths in the implementation. + const int tileSize = 36; + + SkCodec::Options opts; + SkIRect subset; + for (int x = 0; x < width; x += tileSize) { + subset = SkIRect::MakeXYWH(x, 0, SkTMin(tileSize, width - x), height); + opts.fSubset = ⊂ + if (SkCodec::kSuccess != codec->startScanlineDecode(decodeInfo, &opts, + colorPtr, colorCountPtr)) { + return "Could not start scanline decoder."; + } + + codec->getScanlines(bitmap.getAddr(x, 0), height, bitmap.rowBytes()); + } + + premultiply_if_necessary(bitmap); + canvas->drawBitmap(bitmap, 0, 0); + break; + } case kSubset_Mode: { // Arbitrarily choose a divisor. int divisor = 2; diff --git a/dm/DMSrcSink.h b/dm/DMSrcSink.h index a987c7d..3190576 100644 --- a/dm/DMSrcSink.h +++ b/dm/DMSrcSink.h @@ -110,6 +110,7 @@ public: kCodecZeroInit_Mode, kScanline_Mode, kStripe_Mode, // Tests the skipping of scanlines + kCroppedScanline_Mode, // Tests (jpeg) cropped scanline optimization kSubset_Mode, // For codecs that support subsets directly. kGen_Mode, // Test SkCodecImageGenerator (includes YUV) }; diff --git a/gyp/codec.gyp b/gyp/codec.gyp index c53d955..d8f6f66 100644 --- a/gyp/codec.gyp +++ b/gyp/codec.gyp @@ -84,6 +84,13 @@ 'raw_codec', ], },], + ['skia_android_framework == 0', { + 'defines': [ + # TODO (msarett): Add this optimization to Android. + # https://buganizer.corp.google.com/u/0/issues/27290496 + 'TURBO_HAS_CROP', + ], + },], ], }, { # RAW codec needs exceptions. Due to that, it is a separate target. Its usage can be diff --git a/src/codec/SkCodec.cpp b/src/codec/SkCodec.cpp index c4f7498..d7cfb37 100644 --- a/src/codec/SkCodec.cpp +++ b/src/codec/SkCodec.cpp @@ -354,23 +354,28 @@ void SkCodec::fillIncompleteImage(const SkImageInfo& info, void* dst, size_t row const int linesRemaining = linesRequested - linesDecoded; SkSampler* sampler = this->getSampler(false); + int fillWidth = info.width(); + if (fOptions.fSubset) { + fillWidth = fOptions.fSubset->width(); + } + switch (this->getScanlineOrder()) { case kTopDown_SkScanlineOrder: case kNone_SkScanlineOrder: { - const SkImageInfo fillInfo = info.makeWH(info.width(), linesRemaining); + const SkImageInfo fillInfo = info.makeWH(fillWidth, linesRemaining); fillDst = SkTAddOffset(dst, linesDecoded * rowBytes); fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler); break; } case kBottomUp_SkScanlineOrder: { fillDst = dst; - const SkImageInfo fillInfo = info.makeWH(info.width(), linesRemaining); + const SkImageInfo fillInfo = info.makeWH(fillWidth, linesRemaining); fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler); break; } case kOutOfOrder_SkScanlineOrder: { SkASSERT(1 == linesRequested || this->getInfo().height() == linesRequested); - const SkImageInfo fillInfo = info.makeWH(info.width(), 1); + const SkImageInfo fillInfo = info.makeWH(fillWidth, 1); for (int srcY = linesDecoded; srcY < linesRequested; srcY++) { fillDst = SkTAddOffset(dst, this->outputScanline(srcY) * rowBytes); fill_proc(fillInfo, fillDst, rowBytes, fillValue, zeroInit, sampler); diff --git a/src/codec/SkJpegCodec.cpp b/src/codec/SkJpegCodec.cpp index e1440af..d360515 100644 --- a/src/codec/SkJpegCodec.cpp +++ b/src/codec/SkJpegCodec.cpp @@ -80,6 +80,7 @@ SkJpegCodec::SkJpegCodec(const SkImageInfo& srcInfo, SkStream* stream, : INHERITED(srcInfo, stream) , fDecoderMgr(decoderMgr) , fReadyState(decoderMgr->dinfo()->global_state) + , fSwizzlerSubset(SkIRect::MakeEmpty()) {} /* @@ -371,7 +372,16 @@ void SkJpegCodec::initializeSwizzler(const SkImageInfo& dstInfo, const Options& srcConfig = SkSwizzler::kRGB; } - fSwizzler.reset(SkSwizzler::CreateSwizzler(srcConfig, nullptr, dstInfo, options)); + Options swizzlerOptions = options; + if (options.fSubset) { + // Use fSwizzlerSubset if this is a subset decode. This is necessary in the case + // where libjpeg-turbo provides a subset and then we need to subset it further. + // Also, verify that fSwizzlerSubset is initialized and valid. + SkASSERT(!fSwizzlerSubset.isEmpty() && fSwizzlerSubset.x() <= options.fSubset->x() && + fSwizzlerSubset.width() == options.fSubset->width()); + swizzlerOptions.fSubset = &fSwizzlerSubset; + } + fSwizzler.reset(SkSwizzler::CreateSwizzler(srcConfig, nullptr, dstInfo, swizzlerOptions)); SkASSERT(fSwizzler); fStorage.reset(get_row_bytes(fDecoderMgr->dinfo())); fSrcRow = fStorage.get(); @@ -411,12 +421,60 @@ SkCodec::Result SkJpegCodec::onStartScanlineDecode(const SkImageInfo& dstInfo, return kInvalidInput; } + if (options.fSubset) { + fSwizzlerSubset = *options.fSubset; + } + +#ifdef TURBO_HAS_CROP + if (options.fSubset) { + uint32_t startX = options.fSubset->x(); + uint32_t width = options.fSubset->width(); + + // libjpeg-turbo may need to align startX to a multiple of the IDCT + // block size. If this is the case, it will decrease the value of + // startX to the appropriate alignment and also increase the value + // of width so that the right edge of the requested subset remains + // the same. + jpeg_crop_scanline(fDecoderMgr->dinfo(), &startX, &width); + + SkASSERT(startX <= (uint32_t) options.fSubset->x()); + SkASSERT(width >= (uint32_t) options.fSubset->width()); + SkASSERT(startX + width >= (uint32_t) options.fSubset->right()); + + // Instruct the swizzler (if it is necessary) to further subset the + // output provided by libjpeg-turbo. + // + // We set this here (rather than in the if statement below), so that + // if (1) we don't need a swizzler for the subset, and (2) we need a + // swizzler for CMYK, the swizzler will still use the proper subset + // dimensions. + // + // Note that the swizzler will ignore the y and height parameters of + // the subset. Since the scanline decoder (and the swizzler) handle + // one row at a time, only the subsetting in the x-dimension matters. + fSwizzlerSubset.setXYWH(options.fSubset->x() - startX, 0, + options.fSubset->width(), options.fSubset->height()); + + // We will need a swizzler if libjpeg-turbo cannot provide the exact + // subset that we request. + if (startX != (uint32_t) options.fSubset->x() || + width != (uint32_t) options.fSubset->width()) { + this->initializeSwizzler(dstInfo, options); + } + } + + // Make sure we have a swizzler if we are converting from CMYK. + if (!fSwizzler && JCS_CMYK == fDecoderMgr->dinfo()->out_color_space) { + this->initializeSwizzler(dstInfo, options); + } +#else // We will need a swizzler if we are performing a subset decode or // converting from CMYK. J_COLOR_SPACE colorSpace = fDecoderMgr->dinfo()->out_color_space; if (options.fSubset || JCS_CMYK == colorSpace || JCS_RGB == colorSpace) { this->initializeSwizzler(dstInfo, options); } +#endif return kSuccess; } diff --git a/src/codec/SkJpegCodec.h b/src/codec/SkJpegCodec.h index 06685cf..bb5ce75 100644 --- a/src/codec/SkJpegCodec.h +++ b/src/codec/SkJpegCodec.h @@ -115,6 +115,10 @@ private: // scanline decoding SkAutoTMalloc fStorage; // Only used if sampling is needed uint8_t* fSrcRow; // Only used if sampling is needed + // libjpeg-turbo provides some subsetting. In the case that libjpeg-turbo + // cannot take the exact the subset that we need, we will use the swizzler + // to further subset the output from libjpeg-turbo. + SkIRect fSwizzlerSubset; SkAutoTDelete fSwizzler; typedef SkCodec INHERITED; -- 2.7.4