#include "SkImage_Base.h"
#include "SkNx.h"
#include "SkOpts.h"
+#include "SkPM4fPriv.h"
#include "SkString.h"
#define INNER_LOOPS 10
-namespace sk_default {
-extern void brute_force_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
+static void brute_force_srcover_srgb_srgb(
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
+ }
+ ndst -= n;
+ }
+}
+
+static void best_non_simd_srcover_srgb_srgb(
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
+
+ auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
+ srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst, *src);
+ };
+
+ while (ndst >0) {
+ int count = SkTMin(ndst, nsrc);
+ ndst -= count;
+ const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
+ const uint64_t* end = dsrc + (count >> 1);
+ do {
+ if ((~*dsrc & 0xFF000000FF000000) == 0) {
+ do {
+ *ddst++ = *dsrc++;
+ } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
+ } else if ((*dsrc & 0xFF000000FF000000) == 0) {
+ do {
+ dsrc++;
+ ddst++;
+ } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
+ } else {
+ srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
+ reinterpret_cast<const uint32_t*>(dsrc++));
+ }
+ } while (dsrc < end);
+
+ if ((count & 1) != 0) {
+ srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
+ *reinterpret_cast<const uint32_t*>(dsrc));
+ }
+ }
+}
+
+static void trivial_srcover_srgb_srgb(
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_srgb8888_srgb_1(dst++, src[i]);
+ }
+ ndst -= n;
+ }
}
class SrcOverVSkOptsBruteForce {
public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
static bool WorksOnCpu() { return true; }
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
- sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count);
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
+ brute_force_srcover_srgb_srgb(dst, src, count, count);
}
};
-namespace sk_default {
-extern void trivial_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
-}
-
class SrcOverVSkOptsTrivial {
public:
static SkString Name() { return SkString{"VSkOptsTrivial"}; }
static bool WorksOnCpu() { return true; }
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
- sk_default::trivial_srcover_srgb_srgb(dst, src, count, count);
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
+ trivial_srcover_srgb_srgb(dst, src, count, count);
}
};
-namespace sk_default {
-extern void best_non_simd_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
-}
-
class SrcOverVSkOptsNonSimdCore {
public:
static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
static bool WorksOnCpu() { return true; }
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
- sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count);
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
+ best_non_simd_srcover_srgb_srgb(dst, src, count, count);
}
};
public:
static SkString Name() { return SkString{"VSkOptsDefault"}; }
static bool WorksOnCpu() { return true; }
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
sk_default::srcover_srgb_srgb(dst, src, count, count);
}
};
public:
static SkString Name() { return SkString{"VSkOptsSSE41"}; }
static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); }
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
sk_sse41::srcover_srgb_srgb(dst, src, count, count);
}
};
class LinearSrcOverBench : public Benchmark {
public:
LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
- fName = "LinearSrcOver";
+ fName = "LinearSrcOver_";
fName.append(fileName);
fName.append(Blender::Name());
}
bm.peekPixels(&fPixmap);
fCount = fPixmap.rowBytesAsPixels();
fDst.reset(fCount);
- memset(fDst.get(), 0, fPixmap.rowBytes());
+ sk_bzero(fDst.get(), fPixmap.rowBytes());
}
}
for (int i = 0; i < loops * INNER_LOOPS; ++i) {
const uint32_t* src = fPixmap.addr32();
for (int y = 0; y < fPixmap.height(); y++) {
- Blender::BlendN(fDst.get(), width, src);
+ Blender::BlendN(fDst.get(), src, width);
src += width;
}
}
return to_4b(exact_linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
}
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
+// observation that the 255's cancel.
+// invA = 1 - (As / 255);
+//
+// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
+// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
+// => R = sqrt(Rs^2 + Rd^2 * invA)
+// Note: src is assumed to be linear.
+static inline void srcover_blend_srgb8888_srgb_1(uint32_t* dst, const Sk4f& src) {
+ Sk4f d = srgb_to_linear(to_4f(*dst));
+ Sk4f invAlpha = 1.0f - Sk4f{src[SkPM4f::A]} * (1.0f / 255.0f);
+ Sk4f r = linear_to_srgb(src + d * invAlpha) + 0.5f;
+ *dst = to_4b(r);
+}
+
+static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
+ if ((~pixel & 0xFF000000) == 0) {
+ *dst = pixel;
+ } else if ((pixel & 0xFF000000) != 0) {
+ srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
+ }
+}
+
#endif
namespace SK_OPTS_NS {
-// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
-// observation that the 255's cancel.
-// invA = 1 - (As / 255);
-//
-// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
-// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
-// => R = sqrt(Rs^2 + Rd^2 * invA)
-static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
- Sk4f s = srgb_to_linear(to_4f(pixel));
- Sk4f d = srgb_to_linear(to_4f(*dst));
- Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);
- Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f;
- *dst = to_4b(r);
-}
-
-static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
+static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
if ((~pixel & 0xFF000000) == 0) {
*dst = pixel;
} else if ((pixel & 0xFF000000) != 0) {
- blend_srgb_srgb_1(dst, pixel);
+ srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
}
}
-static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst, *src);
-}
-
static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst, *src);
-}
-
-void best_non_simd_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
-
- while (ndst >0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
- const uint64_t* end = dsrc + (count >> 1);
- do {
- if ((~*dsrc & 0xFF000000FF000000) == 0) {
- do {
- *ddst++ = *dsrc++;
- } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
- } else if ((*dsrc & 0xFF000000FF000000) == 0) {
- do {
- dsrc++;
- ddst++;
- } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
- } else {
- srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
- reinterpret_cast<const uint32_t*>(dsrc++));
- }
- } while (dsrc < end);
-
- if ((count & 1) != 0) {
- srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
- *reinterpret_cast<const uint32_t*>(dsrc));
- }
- }
-}
-
-void brute_force_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- while (ndst > 0) {
- int n = SkTMin(ndst, nsrc);
-
- for (int i = 0; i < n; i++) {
- blend_srgb_srgb_1(dst++, src[i]);
- }
- ndst -= n;
- }
-}
-
-void trivial_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- while (ndst > 0) {
- int n = SkTMin(ndst, nsrc);
-
- for (int i = 0; i < n; i++) {
- srcover_srgb_srgb_1(dst++, src[i]);
- }
- ndst -= n;
- }
+ srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst, *src);
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
count = count & 3;
while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst++, *src++);
}
}
}
count = count & 3;
while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb8888_srgb_1(dst++, *src++);
}
}
}
void srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_srgb8888_srgb_1(dst++, src[i]);
+ }
+ ndst -= n;
+ }
}
#endif
#include "SkImage.h"
#include "SkImage_Base.h"
#include "SkOpts.h"
+#include "SkPM4fPriv.h"
#include "SkNx.h"
#include "Test.h"
-#include "../include/core/SkImageInfo.h"
typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
-namespace sk_default {
-extern void brute_force_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
+static void brute_force_srcover_srgb_srgb(
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
+ }
+ ndst -= n;
+ }
}
namespace sk_default {
-extern void trivial_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
-
-extern void best_non_simd_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
-
extern void srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
}
SkAutoTArray<uint32_t> testDst(width);
for (int y = 0; y < pixmap.height(); y++) {
- memset(correctDst.get(), 0, width * sizeof(uint32_t));
- memset(testDst.get(), 0, width * sizeof(uint32_t));
- sk_default::brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width);
+ sk_bzero(correctDst.get(), width * sizeof(uint32_t));
+ sk_bzero(testDst.get(), width * sizeof(uint32_t));
+ brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width);
blender(testDst.get(), src, width, width);
for (int x = 0; x < width; x++) {
REPORTER_ASSERT_MESSAGE(
DEF_TEST(SkBlend_optsCheck, reporter) {
std::vector<Spec> specs = {
- Spec{sk_default::trivial_srcover_srgb_srgb, "trivial"},
- Spec{sk_default::best_non_simd_srcover_srgb_srgb, "best_non_simd"},
Spec{sk_default::srcover_srgb_srgb, "default"},
};
#if defined(SK_CPU_X86) && !defined(SK_BUILD_NO_OPTS)
}
}
-
-
DEF_TEST(SkBlend_optsSqrtCheck, reporter) {
for (int c = 0; c < 256; c++) {
Sk4f i{(float)c};