2 * Copyright 2011 The Android Open Source Project
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
10 #include "include/core/SkBitmap.h"
11 #include "include/core/SkTileMode.h"
12 #include "include/effects/SkImageFilters.h"
13 #include "include/private/SkColorData.h"
14 #include "include/private/SkTFitsIn.h"
15 #include "include/private/SkTPin.h"
16 #include "include/private/SkVx.h"
17 #include "src/core/SkArenaAlloc.h"
18 #include "src/core/SkAutoPixmapStorage.h"
19 #include "src/core/SkGpuBlurUtils.h"
20 #include "src/core/SkImageFilter_Base.h"
21 #include "src/core/SkOpts.h"
22 #include "src/core/SkReadBuffer.h"
23 #include "src/core/SkSpecialImage.h"
24 #include "src/core/SkWriteBuffer.h"
27 #include "src/gpu/ganesh/GrTextureProxy.h"
28 #include "src/gpu/ganesh/SkGr.h"
30 #include "src/gpu/ganesh/v1/SurfaceDrawContext_v1.h"
32 #endif // SK_SUPPORT_GPU
34 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
35 #include <immintrin.h>
36 #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
37 #elif defined(__GNUC__)
38 #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
40 #define SK_PREFETCH(ptr)
45 class SkBlurImageFilter final : public SkImageFilter_Base {
47 SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode,
48 sk_sp<SkImageFilter> input, const SkRect* cropRect)
49 : INHERITED(&input, 1, cropRect)
50 , fSigma{sigmaX, sigmaY}
51 , fTileMode(tileMode) {}
53 SkRect computeFastBounds(const SkRect&) const override;
56 void flatten(SkWriteBuffer&) const override;
57 sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
58 SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
59 MapDirection, const SkIRect* inputRect) const override;
62 friend void ::SkRegisterBlurImageFilterFlattenable();
63 SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
66 sk_sp<SkSpecialImage> gpuFilter(
67 const Context& ctx, SkVector sigma,
68 const sk_sp<SkSpecialImage> &input,
69 SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
75 using INHERITED = SkImageFilter_Base;
80 sk_sp<SkImageFilter> SkImageFilters::Blur(
81 SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
82 const CropRect& cropRect) {
83 if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
86 return sk_sp<SkImageFilter>(
87 new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
90 void SkRegisterBlurImageFilterFlattenable() {
91 SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
92 SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
95 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
96 SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
97 SkScalar sigmaX = buffer.readScalar();
98 SkScalar sigmaY = buffer.readScalar();
99 SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
100 return SkImageFilters::Blur(
101 sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
104 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
105 this->INHERITED::flatten(buffer);
106 buffer.writeScalar(fSigma.fWidth);
107 buffer.writeScalar(fSigma.fHeight);
109 SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
110 buffer.writeInt(static_cast<int>(fTileMode));
113 ///////////////////////////////////////////////////////////////////////////////
116 // This is defined by the SVG spec:
117 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
118 int calculate_window(double sigma) {
119 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
120 return std::max(1, possibleWindow);
123 // This rather arbitrary-looking value results in a maximum box blur kernel size
124 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
125 // implementations. Since the GPU path does not compute a box blur, putting
126 // the limit on sigma ensures consistent behaviour between the GPU and
128 static constexpr SkScalar kMaxSigma = 532.f;
130 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
131 SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
132 ctm.mapVectors(&sigma, 1);
133 sigma.fX = std::min(SkScalarAbs(sigma.fX), kMaxSigma);
134 sigma.fY = std::min(SkScalarAbs(sigma.fY), kMaxSigma);
135 // Disable blurring on axes that were never finite, or became non-finite after mapping by ctm.
136 if (!SkScalarIsFinite(sigma.fX)) {
139 if (!SkScalarIsFinite(sigma.fY)) {
148 explicit Pass(int border) : fBorder(border) {}
149 virtual ~Pass() = default;
151 void blur(int srcLeft, int srcRight, int dstRight,
152 const uint32_t* src, int srcStride,
153 uint32_t* dst, int dstStride) {
156 auto srcStart = srcLeft - fBorder,
157 srcEnd = srcRight - fBorder,
162 const uint32_t* srcCursor = src;
163 uint32_t* dstCursor = dst;
165 if (dstIdx < srcIdx) {
166 // The destination pixels are not effected by the src pixels,
167 // change to zero as per the spec.
168 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
169 while (dstIdx < srcIdx) {
171 dstCursor += dstStride;
172 SK_PREFETCH(dstCursor);
175 } else if (srcIdx < dstIdx) {
176 // The edge of the source is before the edge of the destination. Calculate the sums for
177 // the pixels before the start of the destination.
178 if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
179 // Preload the blur with values from src before dst is entered.
180 int n = commonEnd - srcIdx;
181 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
183 srcCursor += n * srcStride;
185 if (srcIdx < dstIdx) {
186 // The weird case where src is out of pixels before dst is even started.
187 int n = dstIdx - srcIdx;
188 this->blurSegment(n, nullptr, 0, nullptr, 0);
193 // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
194 // normal mode of operation.
195 SkASSERT(srcIdx == dstIdx);
196 if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
197 int n = commonEnd - dstIdx;
198 this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
199 srcCursor += n * srcStride;
200 dstCursor += n * dstStride;
205 // Drain the remaining blur values into dst assuming 0's for the leading edge.
206 if (dstIdx < dstEnd) {
207 int n = dstEnd - dstIdx;
208 this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
213 virtual void startBlur() = 0;
214 virtual void blurSegment(
215 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
223 explicit PassMaker(int window) : fWindow{window} {}
224 virtual ~PassMaker() = default;
225 virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
226 virtual size_t bufferSizeBytes() const = 0;
227 int window() const {return fWindow;}
233 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
234 // The GaussPass is limit to processing sigmas < 135.
235 class GaussPass final : public Pass {
237 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
238 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
239 // Explanation of maximums:
240 // sum0 = window * 255
241 // sum1 = window * sum0 -> window * window * 255
242 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
244 // The value window^3 * 255 must fit in a uint32_t. So,
245 // window^3 < 2^32. window = 255.
247 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
248 // For window <= 255, the largest value for sigma is 136.
249 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
250 SkASSERT(0 <= sigma);
251 int window = calculate_window(sigma);
256 class Maker : public PassMaker {
258 explicit Maker(int window) : PassMaker{window} {}
259 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
260 return GaussPass::Make(this->window(), buffer, alloc);
263 size_t bufferSizeBytes() const override {
264 int window = this->window();
265 size_t onePassSize = window - 1;
266 // If the window is odd, then there is an obvious middle element. For even sizes
267 // 2 passes are shifted, and the last pass has an extra element. Like this:
273 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
274 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
278 return alloc->make<Maker>(window);
281 static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
282 // We don't need to store the trailing edge pixel in the buffer;
283 int passSize = window - 1;
284 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
285 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
286 skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
287 // If the window is odd just one buffer is needed, but if it's even, then there is one
288 // more element on that pass.
289 skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
291 // Calculating the border is tricky. The border is the distance in pixels between the first
292 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
293 // I will go through the odd case which is simpler, and then through the even case. Given a
294 // stack of filters seven wide for the odd case of three passes.
302 // The furthest changed pixel is when the filters are in the following configuration.
310 // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
311 // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
312 // border is 3*((window - 1)/2).
314 // For even cases the filter stack is more complicated. The spec specifies two passes
315 // of even filters and a final pass of odd filters. A stack for a width of six looks like
324 // The furthest pixel looks like this.
332 // For a window of six, the border value is eight. In the even case the border is 3 *
334 int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
336 // If the window is odd then the divisor is just window ^ 3 otherwise,
337 // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
338 int window2 = window * window;
339 int window3 = window2 * window;
340 int divisor = (window & 1) == 1 ? window3 : window3 + window2;
341 return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
344 GaussPass(skvx::Vec<4, uint32_t>* buffer0,
345 skvx::Vec<4, uint32_t>* buffer1,
346 skvx::Vec<4, uint32_t>* buffer2,
347 skvx::Vec<4, uint32_t>* buffersEnd,
354 , fBuffersEnd{buffersEnd}
355 , fDivider(divisor) {}
358 void startBlur() override {
359 skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
362 auto half = fDivider.half();
363 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
364 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
366 fBuffer0Cursor = fBuffer0;
367 fBuffer1Cursor = fBuffer1;
368 fBuffer2Cursor = fBuffer2;
371 // GaussPass implements the common three pass box filter approximation of Gaussian blur,
372 // but combines all three passes into a single pass. This approach is facilitated by three
373 // circular buffers the width of the window which track values for trailing edges of each of
374 // the three passes. This allows the algorithm to use more precision in the calculation
375 // because the values are not rounded each pass. And this implementation also avoids a trap
376 // that's easy to fall into resulting in blending in too many zeroes near the edge.
378 // In general, a window sum has the form:
379 // sum_n+1 = sum_n + leading_edge - trailing_edge.
380 // If instead we do the subtraction at the end of the previous iteration, we can just
381 // calculate the sums instead of having to do the subtractions too.
383 // In previous iteration:
384 // sum_n+1 = sum_n - trailing_edge.
386 // In this iteration:
387 // sum_n+1 = sum_n + leading_edge.
389 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
390 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
391 // three passes at the same time has the form:
393 // sum0_n+1 = sum0_n + leading edge
394 // sum1_n+1 = sum1_n + sum0_n+1
395 // sum2_n+1 = sum2_n + sum1_n+1
397 // sum2_n+1 / window^3 is the new value of the destination pixel.
399 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
400 // next go around. This is the case for odd sized windows, even windows the the third
401 // circular buffer is one larger then the first two circular buffers.
403 // sum2_n+2 = sum2_n+1 - buffer2[i];
404 // buffer2[i] = sum1;
405 // sum1_n+2 = sum1_n+1 - buffer1[i];
406 // buffer1[i] = sum0;
407 // sum0_n+2 = sum0_n+1 - buffer0[i];
408 // buffer0[i] = leading edge
410 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
411 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
412 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
413 skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
414 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
415 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
416 skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
418 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
419 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
424 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
426 sum2 -= *buffer2Cursor;
427 *buffer2Cursor = sum1;
428 buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
429 sum1 -= *buffer1Cursor;
430 *buffer1Cursor = sum0;
431 buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
432 sum0 -= *buffer0Cursor;
433 *buffer0Cursor = leadingEdge;
434 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
436 return skvx::cast<uint8_t>(blurred);
439 auto loadEdge = [&](const uint32_t* srcCursor) {
440 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
445 (void)processValue(0);
447 } else if (src && !dst) {
449 (void)processValue(loadEdge(src));
452 } else if (!src && dst) {
454 processValue(0u).store(dst);
457 } else if (src && dst) {
459 processValue(loadEdge(src)).store(dst);
466 fBuffer0Cursor = buffer0Cursor;
467 fBuffer1Cursor = buffer1Cursor;
468 fBuffer2Cursor = buffer2Cursor;
475 skvx::Vec<4, uint32_t>* const fBuffer0;
476 skvx::Vec<4, uint32_t>* const fBuffer1;
477 skvx::Vec<4, uint32_t>* const fBuffer2;
478 skvx::Vec<4, uint32_t>* const fBuffersEnd;
479 const skvx::ScaledDividerU32 fDivider;
482 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
483 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
484 char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
485 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
486 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
487 skvx::Vec<4, uint32_t>* fBuffer2Cursor;
490 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
491 // The TentPass is limit to processing sigmas < 2183.
492 class TentPass final : public Pass {
494 // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
495 // using the Tent filter. It also limits the size of buffers used hold intermediate values.
496 // Explanation of maximums:
497 // sum0 = window * 255
498 // sum1 = window * sum0 -> window * window * 255
500 // The value window^2 * 255 must fit in a uint32_t. So,
501 // window^2 < 2^32. window = 4104.
503 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
504 // For window <= 4104, the largest value for sigma is 2183.
505 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
506 SkASSERT(0 <= sigma);
507 int gaussianWindow = calculate_window(sigma);
508 // This is a naive method of using the window size for the Gaussian blur to calculate the
509 // window size for the Tent blur. This seems to work well in practice.
511 // We can use a single pixel to generate the effective blur area given a window size. For
512 // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
513 int tentWindow = 3 * gaussianWindow / 2;
514 if (tentWindow >= 4104) {
518 class Maker : public PassMaker {
520 explicit Maker(int window) : PassMaker{window} {}
521 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
522 return TentPass::Make(this->window(), buffer, alloc);
525 size_t bufferSizeBytes() const override {
526 size_t onePassSize = this->window() - 1;
527 // If the window is odd, then there is an obvious middle element. For even sizes 2
528 // passes are shifted, and the last pass has an extra element. Like this:
533 size_t bufferCount = 2 * onePassSize;
534 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
538 return alloc->make<Maker>(tentWindow);
541 static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
546 // We don't need to store the trailing edge pixel in the buffer;
547 int passSize = window - 1;
548 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
549 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
550 skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
552 // Calculating the border is tricky. The border is the distance in pixels between the first
553 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
554 // I will go through the odd case which is simpler, and then through the even case. Given a
555 // stack of filters seven wide for the odd case of three passes.
562 // The furthest changed pixel is when the filters are in the following configuration.
569 // The A pixel is calculated using the value S, the B uses A, and the D uses B.
570 // So, with a window size of seven the border is nine. In the odd case, the border is
573 // For even cases the filter stack is more complicated. It uses two passes
574 // of even filters offset from each other. A stack for a width of six looks like
582 // The furthest pixel looks like this.
589 // For a window of six, the border value is 5. In the even case the border is
591 int border = window - 1;
593 int divisor = window * window;
594 return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
597 TentPass(skvx::Vec<4, uint32_t>* buffer0,
598 skvx::Vec<4, uint32_t>* buffer1,
599 skvx::Vec<4, uint32_t>* buffersEnd,
605 , fBuffersEnd{buffersEnd}
606 , fDivider(divisor) {}
609 void startBlur() override {
610 skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
611 auto half = fDivider.half();
612 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
613 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
615 fBuffer0Cursor = fBuffer0;
616 fBuffer1Cursor = fBuffer1;
619 // TentPass implements the common two pass box filter approximation of Tent filter,
620 // but combines all both passes into a single pass. This approach is facilitated by two
621 // circular buffers the width of the window which track values for trailing edges of each of
622 // both passes. This allows the algorithm to use more precision in the calculation
623 // because the values are not rounded each pass. And this implementation also avoids a trap
624 // that's easy to fall into resulting in blending in too many zeroes near the edge.
626 // In general, a window sum has the form:
627 // sum_n+1 = sum_n + leading_edge - trailing_edge.
628 // If instead we do the subtraction at the end of the previous iteration, we can just
629 // calculate the sums instead of having to do the subtractions too.
631 // In previous iteration:
632 // sum_n+1 = sum_n - trailing_edge.
634 // In this iteration:
635 // sum_n+1 = sum_n + leading_edge.
637 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
638 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
639 // three passes at the same time has the form:
641 // sum0_n+1 = sum0_n + leading edge
642 // sum1_n+1 = sum1_n + sum0_n+1
644 // sum1_n+1 / window^2 is the new value of the destination pixel.
646 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
649 // sum1_n+2 = sum1_n+1 - buffer1[i];
650 // buffer1[i] = sum0;
651 // sum0_n+2 = sum0_n+1 - buffer0[i];
652 // buffer0[i] = leading edge
654 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
655 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
656 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
657 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
658 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
660 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
661 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
665 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
667 sum1 -= *buffer1Cursor;
668 *buffer1Cursor = sum0;
669 buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
670 sum0 -= *buffer0Cursor;
671 *buffer0Cursor = leadingEdge;
672 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
674 return skvx::cast<uint8_t>(blurred);
677 auto loadEdge = [&](const uint32_t* srcCursor) {
678 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
683 (void)processValue(0);
685 } else if (src && !dst) {
687 (void)processValue(loadEdge(src));
690 } else if (!src && dst) {
692 processValue(0u).store(dst);
695 } else if (src && dst) {
697 processValue(loadEdge(src)).store(dst);
704 fBuffer0Cursor = buffer0Cursor;
705 fBuffer1Cursor = buffer1Cursor;
710 skvx::Vec<4, uint32_t>* const fBuffer0;
711 skvx::Vec<4, uint32_t>* const fBuffer1;
712 skvx::Vec<4, uint32_t>* const fBuffersEnd;
713 const skvx::ScaledDividerU32 fDivider;
716 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
717 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
718 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
719 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
722 sk_sp<SkSpecialImage> copy_image_with_bounds(
723 const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
724 SkIRect srcBounds, SkIRect dstBounds) {
726 if (!input->getROPixels(&inputBM)) {
730 if (inputBM.colorType() != kN32_SkColorType) {
735 inputBM.extractSubset(&src, srcBounds);
737 // Make everything relative to the destination bounds.
738 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
739 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
741 auto srcW = srcBounds.width(),
742 dstW = dstBounds.width(),
743 dstH = dstBounds.height();
745 SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
748 if (!dst.tryAllocPixels(dstInfo)) {
752 // There is no blurring to do, but we still need to copy the source while accounting for the
753 // dstBounds. Remember that the src was intersected with the dst.
755 size_t dstWBytes = dstW * sizeof(uint32_t);
756 for (;y < srcBounds.top(); y++) {
757 sk_bzero(dst.getAddr32(0, y), dstWBytes);
760 for (;y < srcBounds.bottom(); y++) {
762 uint32_t* dstPtr = dst.getAddr32(0, y);
763 for (;x < srcBounds.left(); x++) {
767 memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
768 srcW * sizeof(uint32_t));
773 for (;x < dstBounds.right(); x++) {
778 for (;y < dstBounds.bottom(); y++) {
779 sk_bzero(dst.getAddr32(0, y), dstWBytes);
782 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
784 dst, ctx.surfaceProps());
787 // TODO: Implement CPU backend for different fTileMode.
788 sk_sp<SkSpecialImage> cpu_blur(
789 const SkImageFilter_Base::Context& ctx,
790 SkVector sigma, const sk_sp<SkSpecialImage> &input,
791 SkIRect srcBounds, SkIRect dstBounds) {
792 // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
793 // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
794 // computing a kernel over a pixel window filled with 255.
795 static_assert(kMaxSigma <= 2183.0f);
797 SkSTArenaAlloc<1024> alloc;
798 auto makeMaker = [&](double sigma) -> PassMaker* {
799 SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
800 if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
803 if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
806 SK_ABORT("Sigma is out of range.");
809 PassMaker* makerX = makeMaker(sigma.x());
810 PassMaker* makerY = makeMaker(sigma.y());
812 if (makerX->window() <= 1 && makerY->window() <= 1) {
813 return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
818 if (!input->getROPixels(&inputBM)) {
822 if (inputBM.colorType() != kN32_SkColorType) {
827 inputBM.extractSubset(&src, srcBounds);
829 // Make everything relative to the destination bounds.
830 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
831 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
833 auto srcW = srcBounds.width(),
834 srcH = srcBounds.height(),
835 dstW = dstBounds.width(),
836 dstH = dstBounds.height();
838 SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
841 if (!dst.tryAllocPixels(dstInfo)) {
845 size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
846 auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
848 // Basic Plan: The three cases to handle
849 // * Horizontal and Vertical - blur horizontally while copying values from the source to
850 // the destination. Then, do an in-place vertical blur.
851 // * Horizontal only - blur horizontally copying values from the source to the destination.
852 // * Vertical only - blur vertically copying values from the source to the destination.
854 // Default to vertical only blur case. If a horizontal blur is needed, then these values
855 // will be adjusted while doing the horizontal blur.
856 auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
857 auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
858 auto intermediateWidth = srcW;
860 // Because the border is calculated before the fork of the GPU/CPU path. The border is
861 // the maximum of the two rendering methods. In the case where sigma is zero, then the
862 // src and dst left values are the same. If sigma is small resulting in a window size of
863 // 1, then border calculations add some pixels which will always be zero. Inset the
864 // destination by those zero pixels. This case is very rare.
865 auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
867 // The following code is executed very rarely, I have never seen it in a real web
868 // page. If sigma is small but not zero then shared GPU/CPU border calculation
869 // code adds extra pixels for the border. Just clear everything to clear those pixels.
870 // This solution is overkill, but very simple.
871 if (makerX->window() == 1 || makerY->window() == 1) {
875 if (makerX->window() > 1) {
876 Pass* pass = makerX->makePass(buffer, &alloc);
877 // Make int64 to avoid overflow in multiplication below.
878 int64_t shift = srcBounds.top() - dstBounds.top();
880 // For the horizontal blur, starts part way down in anticipation of the vertical blur.
881 // For a vertical sigma of zero shift should be zero. But, for small sigma,
882 // shift may be > 0 but the vertical window could be 1.
883 intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
884 + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
885 intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
886 intermediateWidth = dstW;
887 intermediateDst = static_cast<uint32_t *>(dst.getPixels());
889 const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
890 uint32_t* dstCursor = intermediateSrc;
891 for (auto y = 0; y < srcH; y++) {
892 pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
893 srcCursor, 1, dstCursor, 1);
894 srcCursor += src.rowBytesAsPixels();
895 dstCursor += intermediateRowBytesAsPixels;
899 if (makerY->window() > 1) {
900 Pass* pass = makerY->makePass(buffer, &alloc);
901 const uint32_t* srcCursor = intermediateSrc;
902 uint32_t* dstCursor = intermediateDst;
903 for (auto x = 0; x < intermediateWidth; x++) {
904 pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
905 srcCursor, intermediateRowBytesAsPixels,
906 dstCursor, dst.rowBytesAsPixels());
912 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
914 dst, ctx.surfaceProps());
918 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
919 SkIPoint* offset) const {
920 SkIPoint inputOffset = SkIPoint::Make(0, 0);
922 sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
927 SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
928 input->width(), input->height());
930 // Calculate the destination bounds.
932 if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
935 if (!inputBounds.intersect(dstBounds)) {
939 // Save the offset in preparation to make all rectangles relative to the inputOffset.
940 SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
942 // Make all bounds relative to the inputOffset.
943 inputBounds.offset(-inputOffset);
944 dstBounds.offset(-inputOffset);
946 SkVector sigma = map_sigma(fSigma, ctx.ctm());
947 SkASSERT(SkScalarIsFinite(sigma.x()) && sigma.x() >= 0.f && sigma.x() <= kMaxSigma &&
948 SkScalarIsFinite(sigma.y()) && sigma.y() >= 0.f && sigma.y() <= kMaxSigma);
950 sk_sp<SkSpecialImage> result;
952 if (ctx.gpuBacked()) {
953 // Ensure the input is in the destination's gamut. This saves us from having to do the
954 // xform during the filter itself.
955 input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
957 result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
962 result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
965 // Return the resultOffset if the blur succeeded.
966 if (result != nullptr) {
967 *offset = resultOffset;
973 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
974 const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
975 SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
977 if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
978 SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
979 offset->fX = inputBounds.x() + inputOffset.fX;
980 offset->fY = inputBounds.y() + inputOffset.fY;
981 return input->makeSubset(inputBounds);
984 auto context = ctx.getContext();
986 GrSurfaceProxyView inputView = input->view(context);
987 if (!inputView.proxy()) {
990 SkASSERT(inputView.asTextureProxy());
992 // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()?
993 dstBounds.offset(input->subset().topLeft());
994 inputBounds.offset(input->subset().topLeft());
995 auto sdc = SkGpuBlurUtils::GaussianBlur(
997 std::move(inputView),
998 SkColorTypeToGrColorType(input->colorType()),
1000 ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr,
1010 return SkSpecialImage::MakeDeferredFromGpu(context,
1011 SkIRect::MakeSize(dstBounds.size()),
1012 kNeedNewImageUniqueID_SpecialImage,
1013 sdc->readSurfaceView(),
1014 sdc->colorInfo().colorType(),
1015 sk_ref_sp(input->getColorSpace()),
1016 ctx.surfaceProps());
1023 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1024 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1025 bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1029 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1030 MapDirection, const SkIRect* inputRect) const {
1031 SkVector sigma = map_sigma(fSigma, ctm);
1032 return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));