submodule/skia/src/effects/imagefilters/SkBlurImageFilter.cpp

   1 /*
   2  * Copyright 2011 The Android Open Source Project
   3  *
   4  * Use of this source code is governed by a BSD-style license that can be
   5  * found in the LICENSE file.
   6  */
   7
   8 #include <algorithm>
   9
  10 #include "include/core/SkBitmap.h"
  11 #include "include/core/SkTileMode.h"
  12 #include "include/effects/SkImageFilters.h"
  13 #include "include/private/SkColorData.h"
  14 #include "include/private/SkTFitsIn.h"
  15 #include "include/private/SkTPin.h"
  16 #include "include/private/SkVx.h"
  17 #include "src/core/SkArenaAlloc.h"
  18 #include "src/core/SkAutoPixmapStorage.h"
  19 #include "src/core/SkGpuBlurUtils.h"
  20 #include "src/core/SkImageFilter_Base.h"
  21 #include "src/core/SkOpts.h"
  22 #include "src/core/SkReadBuffer.h"
  23 #include "src/core/SkSpecialImage.h"
  24 #include "src/core/SkWriteBuffer.h"
  25
  26 #if SK_SUPPORT_GPU
  27 #include "src/gpu/ganesh/GrTextureProxy.h"
  28 #include "src/gpu/ganesh/SkGr.h"
  29 #if SK_GPU_V1
  30 #include "src/gpu/ganesh/v1/SurfaceDrawContext_v1.h"
  31 #endif // SK_GPU_V1
  32 #endif // SK_SUPPORT_GPU
  33
  34 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
  35     #include <immintrin.h>
  36     #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
  37 #elif defined(__GNUC__)
  38     #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
  39 #else
  40     #define SK_PREFETCH(ptr)
  41 #endif
  42
  43 namespace {
  44
  45 class SkBlurImageFilter final : public SkImageFilter_Base {
  46 public:
  47     SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY,  SkTileMode tileMode,
  48                       sk_sp<SkImageFilter> input, const SkRect* cropRect)
  49             : INHERITED(&input, 1, cropRect)
  50             , fSigma{sigmaX, sigmaY}
  51             , fTileMode(tileMode) {}
  52
  53     SkRect computeFastBounds(const SkRect&) const override;
  54
  55 protected:
  56     void flatten(SkWriteBuffer&) const override;
  57     sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
  58     SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
  59                                MapDirection, const SkIRect* inputRect) const override;
  60
  61 private:
  62     friend void ::SkRegisterBlurImageFilterFlattenable();
  63     SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
  64
  65 #if SK_SUPPORT_GPU
  66     sk_sp<SkSpecialImage> gpuFilter(
  67             const Context& ctx, SkVector sigma,
  68             const sk_sp<SkSpecialImage> &input,
  69             SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
  70 #endif
  71
  72     SkSize     fSigma;
  73     SkTileMode fTileMode;
  74
  75     using INHERITED = SkImageFilter_Base;
  76 };
  77
  78 } // end namespace
  79
  80 sk_sp<SkImageFilter> SkImageFilters::Blur(
  81         SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
  82         const CropRect& cropRect) {
  83     if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
  84         return input;
  85     }
  86     return sk_sp<SkImageFilter>(
  87           new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
  88 }
  89
  90 void SkRegisterBlurImageFilterFlattenable() {
  91     SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
  92     SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
  93 }
  94
  95 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
  96     SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
  97     SkScalar sigmaX = buffer.readScalar();
  98     SkScalar sigmaY = buffer.readScalar();
  99     SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
 100     return SkImageFilters::Blur(
 101           sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
 102 }
 103
 104 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
 105     this->INHERITED::flatten(buffer);
 106     buffer.writeScalar(fSigma.fWidth);
 107     buffer.writeScalar(fSigma.fHeight);
 108
 109     SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
 110     buffer.writeInt(static_cast<int>(fTileMode));
 111 }
 112
 113 ///////////////////////////////////////////////////////////////////////////////
 114
 115 namespace {
 116 // This is defined by the SVG spec:
 117 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
 118 int calculate_window(double sigma) {
 119     auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
 120     return std::max(1, possibleWindow);
 121 }
 122
 123 // This rather arbitrary-looking value results in a maximum box blur kernel size
 124 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
 125 // implementations. Since the GPU path does not compute a box blur, putting
 126 // the limit on sigma ensures consistent behaviour between the GPU and
 127 // raster paths.
 128 static constexpr SkScalar kMaxSigma = 532.f;
 129
 130 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
 131     SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
 132     ctm.mapVectors(&sigma, 1);
 133     sigma.fX = std::min(SkScalarAbs(sigma.fX), kMaxSigma);
 134     sigma.fY = std::min(SkScalarAbs(sigma.fY), kMaxSigma);
 135     // Disable blurring on axes that were never finite, or became non-finite after mapping by ctm.
 136     if (!SkScalarIsFinite(sigma.fX)) {
 137         sigma.fX = 0.f;
 138     }
 139     if (!SkScalarIsFinite(sigma.fY)) {
 140         sigma.fY = 0.f;
 141     }
 142     return sigma;
 143 }
 144
 145
 146 class Pass {
 147 public:
 148     explicit Pass(int border) : fBorder(border) {}
 149     virtual ~Pass() = default;
 150
 151     void blur(int srcLeft, int srcRight, int dstRight,
 152               const uint32_t* src, int srcStride,
 153               uint32_t* dst, int dstStride) {
 154         this->startBlur();
 155
 156         auto srcStart = srcLeft - fBorder,
 157                 srcEnd   = srcRight - fBorder,
 158                 dstEnd   = dstRight,
 159                 srcIdx   = srcStart,
 160                 dstIdx   = 0;
 161
 162         const uint32_t* srcCursor = src;
 163         uint32_t* dstCursor = dst;
 164
 165         if (dstIdx < srcIdx) {
 166             // The destination pixels are not effected by the src pixels,
 167             // change to zero as per the spec.
 168             // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
 169             while (dstIdx < srcIdx) {
 170                 *dstCursor = 0;
 171                 dstCursor += dstStride;
 172                 SK_PREFETCH(dstCursor);
 173                 dstIdx++;
 174             }
 175         } else if (srcIdx < dstIdx) {
 176             // The edge of the source is before the edge of the destination. Calculate the sums for
 177             // the pixels before the start of the destination.
 178             if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
 179                 // Preload the blur with values from src before dst is entered.
 180                 int n = commonEnd - srcIdx;
 181                 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
 182                 srcIdx += n;
 183                 srcCursor += n * srcStride;
 184             }
 185             if (srcIdx < dstIdx) {
 186                 // The weird case where src is out of pixels before dst is even started.
 187                 int n = dstIdx - srcIdx;
 188                 this->blurSegment(n, nullptr, 0, nullptr, 0);
 189                 srcIdx += n;
 190             }
 191         }
 192
 193         // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
 194         // normal mode of operation.
 195         SkASSERT(srcIdx == dstIdx);
 196         if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
 197             int n = commonEnd - dstIdx;
 198             this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
 199             srcCursor += n * srcStride;
 200             dstCursor += n * dstStride;
 201             dstIdx += n;
 202             srcIdx += n;
 203         }
 204
 205         // Drain the remaining blur values into dst assuming 0's for the leading edge.
 206         if (dstIdx < dstEnd) {
 207             int n = dstEnd - dstIdx;
 208             this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
 209         }
 210     }
 211
 212 protected:
 213     virtual void startBlur() = 0;
 214     virtual void blurSegment(
 215             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
 216
 217 private:
 218     const int fBorder;
 219 };
 220
 221 class PassMaker {
 222 public:
 223     explicit PassMaker(int window) : fWindow{window} {}
 224     virtual ~PassMaker() = default;
 225     virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
 226     virtual size_t bufferSizeBytes() const = 0;
 227     int window() const {return fWindow;}
 228
 229 private:
 230     const int fWindow;
 231 };
 232
 233 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
 234 // The GaussPass is limit to processing sigmas < 135.
 235 class GaussPass final : public Pass {
 236 public:
 237     // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
 238     // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
 239     // Explanation of maximums:
 240     //   sum0 = window * 255
 241     //   sum1 = window * sum0 -> window * window * 255
 242     //   sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
 243     //
 244     //   The value window^3 * 255 must fit in a uint32_t. So,
 245     //      window^3 < 2^32. window = 255.
 246     //
 247     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
 248     //   For window <= 255, the largest value for sigma is 136.
 249     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
 250         SkASSERT(0 <= sigma);
 251         int window = calculate_window(sigma);
 252         if (255 <= window) {
 253             return nullptr;
 254         }
 255
 256         class Maker : public PassMaker {
 257         public:
 258             explicit Maker(int window) : PassMaker{window} {}
 259             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
 260                 return GaussPass::Make(this->window(), buffer, alloc);
 261             }
 262
 263             size_t bufferSizeBytes() const override {
 264                 int window = this->window();
 265                 size_t onePassSize = window - 1;
 266                 // If the window is odd, then there is an obvious middle element. For even sizes
 267                 // 2 passes are shifted, and the last pass has an extra element. Like this:
 268                 //       S
 269                 //    aaaAaa
 270                 //     bbBbbb
 271                 //    cccCccc
 272                 //       D
 273                 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
 274                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
 275             }
 276         };
 277
 278         return alloc->make<Maker>(window);
 279     }
 280
 281     static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
 282         // We don't need to store the trailing edge pixel in the buffer;
 283         int passSize = window - 1;
 284         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
 285         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
 286         skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
 287         // If the window is odd just one buffer is needed, but if it's even, then there is one
 288         // more element on that pass.
 289         skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
 290
 291         // Calculating the border is tricky. The border is the distance in pixels between the first
 292         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
 293         // I will go through the odd case which is simpler, and then through the even case. Given a
 294         // stack of filters seven wide for the odd case of three passes.
 295         //
 296         //        S
 297         //     aaaAaaa
 298         //     bbbBbbb
 299         //     cccCccc
 300         //        D
 301         //
 302         // The furthest changed pixel is when the filters are in the following configuration.
 303         //
 304         //                 S
 305         //           aaaAaaa
 306         //        bbbBbbb
 307         //     cccCccc
 308         //        D
 309         //
 310         // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
 311         // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
 312         // border is 3*((window - 1)/2).
 313         //
 314         // For even cases the filter stack is more complicated. The spec specifies two passes
 315         // of even filters and a final pass of odd filters. A stack for a width of six looks like
 316         // this.
 317         //
 318         //       S
 319         //    aaaAaa
 320         //     bbBbbb
 321         //    cccCccc
 322         //       D
 323         //
 324         // The furthest pixel looks like this.
 325         //
 326         //               S
 327         //          aaaAaa
 328         //        bbBbbb
 329         //    cccCccc
 330         //       D
 331         //
 332         // For a window of six, the border value is eight. In the even case the border is 3 *
 333         // (window/2) - 1.
 334         int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
 335
 336         // If the window is odd then the divisor is just window ^ 3 otherwise,
 337         // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
 338         int window2 = window * window;
 339         int window3 = window2 * window;
 340         int divisor = (window & 1) == 1 ? window3 : window3 + window2;
 341         return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
 342     }
 343
 344     GaussPass(skvx::Vec<4, uint32_t>* buffer0,
 345               skvx::Vec<4, uint32_t>* buffer1,
 346               skvx::Vec<4, uint32_t>* buffer2,
 347               skvx::Vec<4, uint32_t>* buffersEnd,
 348               int border,
 349               int divisor)
 350         : Pass{border}
 351         , fBuffer0{buffer0}
 352         , fBuffer1{buffer1}
 353         , fBuffer2{buffer2}
 354         , fBuffersEnd{buffersEnd}
 355         , fDivider(divisor) {}
 356
 357 private:
 358     void startBlur() override {
 359         skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
 360         zero.store(fSum0);
 361         zero.store(fSum1);
 362         auto half = fDivider.half();
 363         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
 364         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
 365
 366         fBuffer0Cursor = fBuffer0;
 367         fBuffer1Cursor = fBuffer1;
 368         fBuffer2Cursor = fBuffer2;
 369     }
 370
 371     // GaussPass implements the common three pass box filter approximation of Gaussian blur,
 372     // but combines all three passes into a single pass. This approach is facilitated by three
 373     // circular buffers the width of the window which track values for trailing edges of each of
 374     // the three passes. This allows the algorithm to use more precision in the calculation
 375     // because the values are not rounded each pass. And this implementation also avoids a trap
 376     // that's easy to fall into resulting in blending in too many zeroes near the edge.
 377     //
 378     // In general, a window sum has the form:
 379     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
 380     // If instead we do the subtraction at the end of the previous iteration, we can just
 381     // calculate the sums instead of having to do the subtractions too.
 382     //
 383     //      In previous iteration:
 384     //      sum_n+1 = sum_n - trailing_edge.
 385     //
 386     //      In this iteration:
 387     //      sum_n+1 = sum_n + leading_edge.
 388     //
 389     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
 390     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
 391     // three passes at the same time has the form:
 392     //
 393     //    sum0_n+1 = sum0_n + leading edge
 394     //    sum1_n+1 = sum1_n + sum0_n+1
 395     //    sum2_n+1 = sum2_n + sum1_n+1
 396     //
 397     //    sum2_n+1 / window^3 is the new value of the destination pixel.
 398     //
 399     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
 400     // next go around. This is the case for odd sized windows, even windows the the third
 401     // circular buffer is one larger then the first two circular buffers.
 402     //
 403     //    sum2_n+2 = sum2_n+1 - buffer2[i];
 404     //    buffer2[i] = sum1;
 405     //    sum1_n+2 = sum1_n+1 - buffer1[i];
 406     //    buffer1[i] = sum0;
 407     //    sum0_n+2 = sum0_n+1 - buffer0[i];
 408     //    buffer0[i] = leading edge
 409     void blurSegment(
 410             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
 411         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
 412         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
 413         skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
 414         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
 415         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
 416         skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
 417
 418         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
 419         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
 420             sum0 += leadingEdge;
 421             sum1 += sum0;
 422             sum2 += sum1;
 423
 424             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
 425
 426             sum2 -= *buffer2Cursor;
 427             *buffer2Cursor = sum1;
 428             buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
 429             sum1 -= *buffer1Cursor;
 430             *buffer1Cursor = sum0;
 431             buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
 432             sum0 -= *buffer0Cursor;
 433             *buffer0Cursor = leadingEdge;
 434             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
 435
 436             return skvx::cast<uint8_t>(blurred);
 437         };
 438
 439         auto loadEdge = [&](const uint32_t* srcCursor) {
 440             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
 441         };
 442
 443         if (!src && !dst) {
 444             while (n --> 0) {
 445                 (void)processValue(0);
 446             }
 447         } else if (src && !dst) {
 448             while (n --> 0) {
 449                 (void)processValue(loadEdge(src));
 450                 src += srcStride;
 451             }
 452         } else if (!src && dst) {
 453             while (n --> 0) {
 454                 processValue(0u).store(dst);
 455                 dst += dstStride;
 456             }
 457         } else if (src && dst) {
 458             while (n --> 0) {
 459                 processValue(loadEdge(src)).store(dst);
 460                 src += srcStride;
 461                 dst += dstStride;
 462             }
 463         }
 464
 465         // Store the state
 466         fBuffer0Cursor = buffer0Cursor;
 467         fBuffer1Cursor = buffer1Cursor;
 468         fBuffer2Cursor = buffer2Cursor;
 469
 470         sum0.store(fSum0);
 471         sum1.store(fSum1);
 472         sum2.store(fSum2);
 473     }
 474
 475     skvx::Vec<4, uint32_t>* const fBuffer0;
 476     skvx::Vec<4, uint32_t>* const fBuffer1;
 477     skvx::Vec<4, uint32_t>* const fBuffer2;
 478     skvx::Vec<4, uint32_t>* const fBuffersEnd;
 479     const skvx::ScaledDividerU32 fDivider;
 480
 481     // blur state
 482     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
 483     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
 484     char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
 485     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
 486     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
 487     skvx::Vec<4, uint32_t>* fBuffer2Cursor;
 488 };
 489
 490 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
 491 // The TentPass is limit to processing sigmas < 2183.
 492 class TentPass final : public Pass {
 493 public:
 494     // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
 495     // using the Tent filter. It also limits the size of buffers used hold intermediate values.
 496     // Explanation of maximums:
 497     //   sum0 = window * 255
 498     //   sum1 = window * sum0 -> window * window * 255
 499     //
 500     //   The value window^2 * 255 must fit in a uint32_t. So,
 501     //      window^2 < 2^32. window = 4104.
 502     //
 503     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
 504     //   For window <= 4104, the largest value for sigma is 2183.
 505     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
 506         SkASSERT(0 <= sigma);
 507         int gaussianWindow = calculate_window(sigma);
 508         // This is a naive method of using the window size for the Gaussian blur to calculate the
 509         // window size for the Tent blur. This seems to work well in practice.
 510         //
 511         // We can use a single pixel to generate the effective blur area given a window size. For
 512         // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
 513         int tentWindow = 3 * gaussianWindow / 2;
 514         if (tentWindow >= 4104) {
 515             return nullptr;
 516         }
 517
 518         class Maker : public PassMaker {
 519         public:
 520             explicit Maker(int window) : PassMaker{window} {}
 521             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
 522                 return TentPass::Make(this->window(), buffer, alloc);
 523             }
 524
 525             size_t bufferSizeBytes() const override {
 526                 size_t onePassSize = this->window() - 1;
 527                 // If the window is odd, then there is an obvious middle element. For even sizes 2
 528                 // passes are shifted, and the last pass has an extra element. Like this:
 529                 //       S
 530                 //    aaaAaa
 531                 //     bbBbbb
 532                 //       D
 533                 size_t bufferCount = 2 * onePassSize;
 534                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
 535             }
 536         };
 537
 538         return alloc->make<Maker>(tentWindow);
 539     }
 540
 541     static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
 542         if (window > 4104) {
 543             return nullptr;
 544         }
 545
 546         // We don't need to store the trailing edge pixel in the buffer;
 547         int passSize = window - 1;
 548         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
 549         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
 550         skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
 551
 552         // Calculating the border is tricky. The border is the distance in pixels between the first
 553         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
 554         // I will go through the odd case which is simpler, and then through the even case. Given a
 555         // stack of filters seven wide for the odd case of three passes.
 556         //
 557         //        S
 558         //     aaaAaaa
 559         //     bbbBbbb
 560         //        D
 561         //
 562         // The furthest changed pixel is when the filters are in the following configuration.
 563         //
 564         //              S
 565         //        aaaAaaa
 566         //     bbbBbbb
 567         //        D
 568         //
 569         // The A pixel is calculated using the value S, the B uses A, and the D uses B.
 570         // So, with a window size of seven the border is nine. In the odd case, the border is
 571         // window - 1.
 572         //
 573         // For even cases the filter stack is more complicated. It uses two passes
 574         // of even filters offset from each other. A stack for a width of six looks like
 575         // this.
 576         //
 577         //       S
 578         //    aaaAaa
 579         //     bbBbbb
 580         //       D
 581         //
 582         // The furthest pixel looks like this.
 583         //
 584         //            S
 585         //       aaaAaa
 586         //     bbBbbb
 587         //       D
 588         //
 589         // For a window of six, the border value is 5. In the even case the border is
 590         // window - 1.
 591         int border = window - 1;
 592
 593         int divisor = window * window;
 594         return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
 595     }
 596
 597     TentPass(skvx::Vec<4, uint32_t>* buffer0,
 598              skvx::Vec<4, uint32_t>* buffer1,
 599              skvx::Vec<4, uint32_t>* buffersEnd,
 600              int border,
 601              int divisor)
 602          : Pass{border}
 603          , fBuffer0{buffer0}
 604          , fBuffer1{buffer1}
 605          , fBuffersEnd{buffersEnd}
 606          , fDivider(divisor) {}
 607
 608 private:
 609     void startBlur() override {
 610         skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
 611         auto half = fDivider.half();
 612         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
 613         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
 614
 615         fBuffer0Cursor = fBuffer0;
 616         fBuffer1Cursor = fBuffer1;
 617     }
 618
 619     // TentPass implements the common two pass box filter approximation of Tent filter,
 620     // but combines all both passes into a single pass. This approach is facilitated by two
 621     // circular buffers the width of the window which track values for trailing edges of each of
 622     // both passes. This allows the algorithm to use more precision in the calculation
 623     // because the values are not rounded each pass. And this implementation also avoids a trap
 624     // that's easy to fall into resulting in blending in too many zeroes near the edge.
 625     //
 626     // In general, a window sum has the form:
 627     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
 628     // If instead we do the subtraction at the end of the previous iteration, we can just
 629     // calculate the sums instead of having to do the subtractions too.
 630     //
 631     //      In previous iteration:
 632     //      sum_n+1 = sum_n - trailing_edge.
 633     //
 634     //      In this iteration:
 635     //      sum_n+1 = sum_n + leading_edge.
 636     //
 637     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
 638     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
 639     // three passes at the same time has the form:
 640     //
 641     //    sum0_n+1 = sum0_n + leading edge
 642     //    sum1_n+1 = sum1_n + sum0_n+1
 643     //
 644     //    sum1_n+1 / window^2 is the new value of the destination pixel.
 645     //
 646     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
 647     // next go around.
 648     //
 649     //    sum1_n+2 = sum1_n+1 - buffer1[i];
 650     //    buffer1[i] = sum0;
 651     //    sum0_n+2 = sum0_n+1 - buffer0[i];
 652     //    buffer0[i] = leading edge
 653     void blurSegment(
 654             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
 655         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
 656         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
 657         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
 658         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
 659
 660         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
 661         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
 662             sum0 += leadingEdge;
 663             sum1 += sum0;
 664
 665             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
 666
 667             sum1 -= *buffer1Cursor;
 668             *buffer1Cursor = sum0;
 669             buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
 670             sum0 -= *buffer0Cursor;
 671             *buffer0Cursor = leadingEdge;
 672             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
 673
 674             return skvx::cast<uint8_t>(blurred);
 675         };
 676
 677         auto loadEdge = [&](const uint32_t* srcCursor) {
 678             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
 679         };
 680
 681         if (!src && !dst) {
 682             while (n --> 0) {
 683                 (void)processValue(0);
 684             }
 685         } else if (src && !dst) {
 686             while (n --> 0) {
 687                 (void)processValue(loadEdge(src));
 688                 src += srcStride;
 689             }
 690         } else if (!src && dst) {
 691             while (n --> 0) {
 692                 processValue(0u).store(dst);
 693                 dst += dstStride;
 694             }
 695         } else if (src && dst) {
 696             while (n --> 0) {
 697                 processValue(loadEdge(src)).store(dst);
 698                 src += srcStride;
 699                 dst += dstStride;
 700             }
 701         }
 702
 703         // Store the state
 704         fBuffer0Cursor = buffer0Cursor;
 705         fBuffer1Cursor = buffer1Cursor;
 706         sum0.store(fSum0);
 707         sum1.store(fSum1);
 708     }
 709
 710     skvx::Vec<4, uint32_t>* const fBuffer0;
 711     skvx::Vec<4, uint32_t>* const fBuffer1;
 712     skvx::Vec<4, uint32_t>* const fBuffersEnd;
 713     const skvx::ScaledDividerU32 fDivider;
 714
 715     // blur state
 716     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
 717     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
 718     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
 719     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
 720 };
 721
 722 sk_sp<SkSpecialImage> copy_image_with_bounds(
 723         const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
 724         SkIRect srcBounds, SkIRect dstBounds) {
 725     SkBitmap inputBM;
 726     if (!input->getROPixels(&inputBM)) {
 727         return nullptr;
 728     }
 729
 730     if (inputBM.colorType() != kN32_SkColorType) {
 731         return nullptr;
 732     }
 733
 734     SkBitmap src;
 735     inputBM.extractSubset(&src, srcBounds);
 736
 737     // Make everything relative to the destination bounds.
 738     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
 739     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
 740
 741     auto srcW = srcBounds.width(),
 742          dstW = dstBounds.width(),
 743          dstH = dstBounds.height();
 744
 745     SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
 746
 747     SkBitmap dst;
 748     if (!dst.tryAllocPixels(dstInfo)) {
 749         return nullptr;
 750     }
 751
 752     // There is no blurring to do, but we still need to copy the source while accounting for the
 753     // dstBounds. Remember that the src was intersected with the dst.
 754     int y = 0;
 755     size_t dstWBytes = dstW * sizeof(uint32_t);
 756     for (;y < srcBounds.top(); y++) {
 757         sk_bzero(dst.getAddr32(0, y), dstWBytes);
 758     }
 759
 760     for (;y < srcBounds.bottom(); y++) {
 761         int x = 0;
 762         uint32_t* dstPtr = dst.getAddr32(0, y);
 763         for (;x < srcBounds.left(); x++) {
 764             *dstPtr++ = 0;
 765         }
 766
 767         memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
 768                srcW * sizeof(uint32_t));
 769
 770         dstPtr += srcW;
 771         x += srcW;
 772
 773         for (;x < dstBounds.right(); x++) {
 774             *dstPtr++ = 0;
 775         }
 776     }
 777
 778     for (;y < dstBounds.bottom(); y++) {
 779         sk_bzero(dst.getAddr32(0, y), dstWBytes);
 780     }
 781
 782     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
 783                                                           dstBounds.height()),
 784                                           dst, ctx.surfaceProps());
 785 }
 786
 787 // TODO: Implement CPU backend for different fTileMode.
 788 sk_sp<SkSpecialImage> cpu_blur(
 789         const SkImageFilter_Base::Context& ctx,
 790         SkVector sigma, const sk_sp<SkSpecialImage> &input,
 791         SkIRect srcBounds, SkIRect dstBounds) {
 792     // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
 793     // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
 794     // computing a kernel over a pixel window filled with 255.
 795     static_assert(kMaxSigma <= 2183.0f);
 796
 797     SkSTArenaAlloc<1024> alloc;
 798     auto makeMaker = [&](double sigma) -> PassMaker* {
 799         SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
 800         if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
 801             return maker;
 802         }
 803         if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
 804             return maker;
 805         }
 806         SK_ABORT("Sigma is out of range.");
 807     };
 808
 809     PassMaker* makerX = makeMaker(sigma.x());
 810     PassMaker* makerY = makeMaker(sigma.y());
 811
 812     if (makerX->window() <= 1 && makerY->window() <= 1) {
 813         return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
 814     }
 815
 816     SkBitmap inputBM;
 817
 818     if (!input->getROPixels(&inputBM)) {
 819         return nullptr;
 820     }
 821
 822     if (inputBM.colorType() != kN32_SkColorType) {
 823         return nullptr;
 824     }
 825
 826     SkBitmap src;
 827     inputBM.extractSubset(&src, srcBounds);
 828
 829     // Make everything relative to the destination bounds.
 830     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
 831     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
 832
 833     auto srcW = srcBounds.width(),
 834          srcH = srcBounds.height(),
 835          dstW = dstBounds.width(),
 836          dstH = dstBounds.height();
 837
 838     SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
 839
 840     SkBitmap dst;
 841     if (!dst.tryAllocPixels(dstInfo)) {
 842         return nullptr;
 843     }
 844
 845     size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
 846     auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
 847
 848     // Basic Plan: The three cases to handle
 849     // * Horizontal and Vertical - blur horizontally while copying values from the source to
 850     //     the destination. Then, do an in-place vertical blur.
 851     // * Horizontal only - blur horizontally copying values from the source to the destination.
 852     // * Vertical only - blur vertically copying values from the source to the destination.
 853
 854     // Default to vertical only blur case. If a horizontal blur is needed, then these values
 855     // will be adjusted while doing the horizontal blur.
 856     auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
 857     auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
 858     auto intermediateWidth = srcW;
 859
 860     // Because the border is calculated before the fork of the GPU/CPU path. The border is
 861     // the maximum of the two rendering methods. In the case where sigma is zero, then the
 862     // src and dst left values are the same. If sigma is small resulting in a window size of
 863     // 1, then border calculations add some pixels which will always be zero. Inset the
 864     // destination by those zero pixels. This case is very rare.
 865     auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
 866
 867     // The following code is executed very rarely, I have never seen it in a real web
 868     // page. If sigma is small but not zero then shared GPU/CPU border calculation
 869     // code adds extra pixels for the border. Just clear everything to clear those pixels.
 870     // This solution is overkill, but very simple.
 871     if (makerX->window() == 1 || makerY->window() == 1) {
 872         dst.eraseColor(0);
 873     }
 874
 875     if (makerX->window() > 1) {
 876         Pass* pass = makerX->makePass(buffer, &alloc);
 877         // Make int64 to avoid overflow in multiplication below.
 878         int64_t shift = srcBounds.top() - dstBounds.top();
 879
 880         // For the horizontal blur, starts part way down in anticipation of the vertical blur.
 881         // For a vertical sigma of zero shift should be zero. But, for small sigma,
 882         // shift may be > 0 but the vertical window could be 1.
 883         intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
 884                           + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
 885         intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
 886         intermediateWidth = dstW;
 887         intermediateDst = static_cast<uint32_t *>(dst.getPixels());
 888
 889         const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
 890         uint32_t* dstCursor = intermediateSrc;
 891         for (auto y = 0; y < srcH; y++) {
 892             pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
 893                       srcCursor, 1, dstCursor, 1);
 894             srcCursor += src.rowBytesAsPixels();
 895             dstCursor += intermediateRowBytesAsPixels;
 896         }
 897     }
 898
 899     if (makerY->window() > 1) {
 900         Pass* pass = makerY->makePass(buffer, &alloc);
 901         const uint32_t* srcCursor = intermediateSrc;
 902         uint32_t* dstCursor = intermediateDst;
 903         for (auto x = 0; x < intermediateWidth; x++) {
 904             pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
 905                        srcCursor, intermediateRowBytesAsPixels,
 906                        dstCursor, dst.rowBytesAsPixels());
 907             srcCursor += 1;
 908             dstCursor += 1;
 909         }
 910     }
 911
 912     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
 913                                                           dstBounds.height()),
 914                                           dst, ctx.surfaceProps());
 915 }
 916 }  // namespace
 917
 918 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
 919                                                        SkIPoint* offset) const {
 920     SkIPoint inputOffset = SkIPoint::Make(0, 0);
 921
 922     sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
 923     if (!input) {
 924         return nullptr;
 925     }
 926
 927     SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
 928                                             input->width(), input->height());
 929
 930     // Calculate the destination bounds.
 931     SkIRect dstBounds;
 932     if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
 933         return nullptr;
 934     }
 935     if (!inputBounds.intersect(dstBounds)) {
 936         return nullptr;
 937     }
 938
 939     // Save the offset in preparation to make all rectangles relative to the inputOffset.
 940     SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
 941
 942     // Make all bounds relative to the inputOffset.
 943     inputBounds.offset(-inputOffset);
 944     dstBounds.offset(-inputOffset);
 945
 946     SkVector sigma = map_sigma(fSigma, ctx.ctm());
 947     SkASSERT(SkScalarIsFinite(sigma.x()) && sigma.x() >= 0.f && sigma.x() <= kMaxSigma &&
 948              SkScalarIsFinite(sigma.y()) && sigma.y() >= 0.f && sigma.y() <= kMaxSigma);
 949
 950     sk_sp<SkSpecialImage> result;
 951 #if SK_SUPPORT_GPU
 952     if (ctx.gpuBacked()) {
 953         // Ensure the input is in the destination's gamut. This saves us from having to do the
 954         // xform during the filter itself.
 955         input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
 956                                   ctx.surfaceProps());
 957         result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
 958                                  &resultOffset);
 959     } else
 960 #endif
 961     {
 962         result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
 963     }
 964
 965     // Return the resultOffset if the blur succeeded.
 966     if (result != nullptr) {
 967         *offset = resultOffset;
 968     }
 969     return result;
 970 }
 971
 972 #if SK_SUPPORT_GPU
 973 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
 974         const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
 975         SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
 976 #if SK_GPU_V1
 977     if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
 978         SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
 979         offset->fX = inputBounds.x() + inputOffset.fX;
 980         offset->fY = inputBounds.y() + inputOffset.fY;
 981         return input->makeSubset(inputBounds);
 982     }
 983
 984     auto context = ctx.getContext();
 985
 986     GrSurfaceProxyView inputView = input->view(context);
 987     if (!inputView.proxy()) {
 988         return nullptr;
 989     }
 990     SkASSERT(inputView.asTextureProxy());
 991
 992     // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()?
 993     dstBounds.offset(input->subset().topLeft());
 994     inputBounds.offset(input->subset().topLeft());
 995     auto sdc = SkGpuBlurUtils::GaussianBlur(
 996             context,
 997             std::move(inputView),
 998             SkColorTypeToGrColorType(input->colorType()),
 999             input->alphaType(),
1000             ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr,
1001             dstBounds,
1002             inputBounds,
1003             sigma.x(),
1004             sigma.y(),
1005             fTileMode);
1006     if (!sdc) {
1007         return nullptr;
1008     }
1009
1010     return SkSpecialImage::MakeDeferredFromGpu(context,
1011                                                SkIRect::MakeSize(dstBounds.size()),
1012                                                kNeedNewImageUniqueID_SpecialImage,
1013                                                sdc->readSurfaceView(),
1014                                                sdc->colorInfo().colorType(),
1015                                                sk_ref_sp(input->getColorSpace()),
1016                                                ctx.surfaceProps());
1017 #else // SK_GPU_V1
1018     return nullptr;
1019 #endif // SK_GPU_V1
1020 }
1021 #endif
1022
1023 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1024     SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1025     bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1026     return bounds;
1027 }
1028
1029 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1030                                               MapDirection, const SkIRect* inputRect) const {
1031     SkVector sigma = map_sigma(fSigma, ctm);
1032     return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
1033 }