// - store src back as srgb
// Every stage except for srcover interacts with memory, and so will need _tail variants.
-static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_s_srgb) {
+ auto ptr = (const uint32_t*)ctx + x;
r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL load_s_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_s_srgb_tail) {
+ auto ptr = (const uint32_t*)ctx + x;
r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_d_srgb) {
+ auto ptr = (const uint32_t*)ctx + x;
dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL load_d_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_d_srgb_tail) {
+ auto ptr = (const uint32_t*)ctx + x;
dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL scale_u8(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint8_t*>() + x;
+SK_RASTER_STAGE(scale_u8) {
+ auto ptr = (const uint8_t*)ctx + x;
auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f);
r *= c;
g *= c;
b *= c;
a *= c;
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL scale_u8_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint8_t*>() + x;
+SK_RASTER_STAGE(scale_u8_tail) {
+ auto ptr = (const uint8_t*)ctx + x;
auto c = *ptr * (1/255.0f);
r *= c;
g *= c;
b *= c;
a *= c;
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
+SK_RASTER_STAGE(srcover) {
auto A = 1.0f - a;
r += dr * A;
g += dg * A;
b += db * A;
a += da * A;
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
-static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint32_t*>() + x;
+SK_RASTER_STAGE(store_srgb) {
+ auto ptr = (uint32_t*)ctx + x;
( sk_linear_to_srgb(r)
| sk_linear_to_srgb(g) << 8
| Sk4f_round(255.0f*a) << 24).store(ptr);
}
-static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint32_t*>() + x;
+SK_RASTER_STAGE(store_srgb_tail) {
+ auto ptr = (uint32_t*)ctx + x;
Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0});
rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)};
class SkRasterPipelineBench : public Benchmark {
public:
- SkRasterPipelineBench() {}
+ SkRasterPipelineBench(bool fused) : fFused(fused) {}
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
- const char* onGetName() override { return "SkRasterPipelineBench"; }
+ const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fused"
+ : "SkRasterPipelineBench_pipeline"; }
void onDraw(int loops, SkCanvas*) override {
- SkRasterPipeline p;
- p.append(load_s_srgb, load_s_srgb_tail, src);
- p.append( scale_u8, scale_u8_tail, mask);
- p.append(load_d_srgb, load_d_srgb_tail, dst);
- p.append(srcover);
- p.append( store_srgb, store_srgb_tail, dst);
-
while (loops --> 0) {
- p.run(N);
+ fFused ? this->runFused() : this->runPipeline();
}
}
+
+ void runFused() {
+ Sk4f r,g,b,a, dr,dg,db,da;
+ size_t x = 0, n = N;
+ while (n >= 4) {
+ load_s_srgb(src , x, r,g,b,a, dr,dg,db,da);
+ scale_u8 (mask , x, r,g,b,a, dr,dg,da,da);
+ load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da);
+ srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
+ store_srgb (dst , x, r,g,b,a, dr,dg,da,da);
+
+ x += 4;
+ n -= 4;
+ }
+ while (n > 0) {
+ load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da);
+ scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da);
+ load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da);
+ srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
+ store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da);
+
+ x += 1;
+ n -= 1;
+ }
+ }
+
+ void runPipeline() {
+ SkRasterPipeline p;
+ p.append<load_s_srgb, load_s_srgb_tail>( src);
+ p.append< scale_u8, scale_u8_tail>(mask);
+ p.append<load_d_srgb, load_d_srgb_tail>( dst);
+ p.append<srcover>();
+ p.append< store_srgb, store_srgb_tail>( dst);
+
+ p.run(N);
+ }
+
+ bool fFused;
};
-DEF_BENCH( return new SkRasterPipelineBench; )
+DEF_BENCH( return new SkRasterPipelineBench(true); )
+DEF_BENCH( return new SkRasterPipelineBench(false); )
}
// Clamp colors into [0,1] premul (e.g. just before storing back to memory).
-static void SK_VECTORCALL clamp_01_premul(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
+SK_RASTER_STAGE(clamp_01_premul) {
a = Sk4f::Max(a, 0.0f);
r = Sk4f::Max(r, 0.0f);
g = Sk4f::Max(g, 0.0f);
r = Sk4f::Min(r, a);
g = Sk4f::Min(g, a);
b = Sk4f::Min(b, a);
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// The default shader produces a constant color (from the SkPaint).
-static void SK_VECTORCALL constant_color(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto color = st->ctx<const SkPM4f*>();
+SK_RASTER_STAGE(constant_color) {
+ auto color = (const SkPM4f*)ctx;
r = color->r();
g = color->g();
b = color->b();
a = color->a();
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// The default transfer mode is srcover, s' = s + d*(1-sa).
-static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto A = 1.0f - a;
- r += dr*A;
- g += dg*A;
- b += db*A;
- a += da*A;
- st->next(x, r,g,b,a, dr,dg,db,da);
+SK_RASTER_STAGE(srcover) {
+ r += dr*(1.0f - a);
+ g += dg*(1.0f - a);
+ b += db*(1.0f - a);
+ a += da*(1.0f - a);
}
static Sk4f lerp(const Sk4f& from, const Sk4f& to, const Sk4f& cov) {
}
// s' = d(1-c) + sc, for a constant c.
-static void SK_VECTORCALL lerp_constant_float(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- Sk4f c = *st->ctx<const float*>();
+SK_RASTER_STAGE(lerp_constant_float) {
+ Sk4f c = *(const float*)ctx;
r = lerp(dr, r, c);
g = lerp(dg, g, c);
b = lerp(db, b, c);
a = lerp(da, a, c);
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// s' = d(1-c) + sc, 4 pixels at a time for 8-bit coverage.
-static void SK_VECTORCALL lerp_a8(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint8_t*>() + x;
+SK_RASTER_STAGE(lerp_a8) {
+ auto ptr = (const uint8_t*)ctx + x;
Sk4f c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f);
r = lerp(dr, r, c);
g = lerp(dg, g, c);
b = lerp(db, b, c);
a = lerp(da, a, c);
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Tail variant of lerp_a8() handling 1 pixel at a time.
-static void SK_VECTORCALL lerp_a8_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint8_t*>() + x;
+SK_RASTER_STAGE(lerp_a8_1) {
+ auto ptr = (const uint8_t*)ctx + x;
Sk4f c = *ptr * (1/255.0f);
r = lerp(dr, r, c);
g = lerp(dg, g, c);
b = lerp(db, b, c);
a = lerp(da, a, c);
- st->next(x, r,g,b,a, dr,dg,db,da);
}
static void from_565(const Sk4h& _565, Sk4f* r, Sk4f* g, Sk4f* b) {
}
// s' = d(1-c) + sc, 4 pixels at a time for 565 coverage.
-static void SK_VECTORCALL lerp_lcd16(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint16_t*>() + x;
+SK_RASTER_STAGE(lerp_lcd16) {
+ auto ptr = (const uint16_t*)ctx + x;
Sk4f cr, cg, cb;
from_565(Sk4h::Load(ptr), &cr, &cg, &cb);
g = lerp(dg, g, cg);
b = lerp(db, b, cb);
a = 1.0f;
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Tail variant of lerp_lcd16() handling 1 pixel at a time.
-static void SK_VECTORCALL lerp_lcd16_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint16_t*>() + x;
+SK_RASTER_STAGE(lerp_lcd16_1) {
+ auto ptr = (const uint16_t*)ctx + x;
Sk4f cr, cg, cb;
from_565({*ptr,0,0,0}, &cr, &cg, &cb);
g = lerp(dg, g, cg);
b = lerp(db, b, cb);
a = 1.0f;
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Load 4 565 dst pixels.
-static void SK_VECTORCALL load_d_565(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint16_t*>() + x;
+SK_RASTER_STAGE(load_d_565) {
+ auto ptr = (const uint16_t*)ctx + x;
from_565(Sk4h::Load(ptr), &dr,&dg,&db);
da = 1.0f;
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Load 1 565 dst pixel.
-static void SK_VECTORCALL load_d_565_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint16_t*>() + x;
+SK_RASTER_STAGE(load_d_565_1) {
+ auto ptr = (const uint16_t*)ctx + x;
from_565({*ptr,0,0,0}, &dr,&dg,&db);
da = 1.0f;
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Store 4 565 pixels.
-static void SK_VECTORCALL store_565(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint16_t*>() + x;
+SK_RASTER_STAGE(store_565) {
+ auto ptr = (uint16_t*)ctx + x;
to_565(r,g,b).store(ptr);
}
// Store 1 565 pixel.
-static void SK_VECTORCALL store_565_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint16_t*>() + x;
+SK_RASTER_STAGE(store_565_1) {
+ auto ptr = (uint16_t*)ctx + x;
*ptr = to_565(r,g,b)[0];
}
// Load 4 F16 pixels.
-static void SK_VECTORCALL load_d_f16(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint64_t*>() + x;
+SK_RASTER_STAGE(load_d_f16) {
+ auto ptr = (const uint64_t*)ctx + x;
Sk4h rh, gh, bh, ah;
Sk4h_load4(ptr, &rh, &gh, &bh, &ah);
dg = SkHalfToFloat_finite(gh);
db = SkHalfToFloat_finite(bh);
da = SkHalfToFloat_finite(ah);
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Load 1 F16 pixel.
-static void SK_VECTORCALL load_d_f16_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint64_t*>() + x;
+SK_RASTER_STAGE(load_d_f16_1) {
+ auto ptr = (const uint64_t*)ctx + x;
auto p0 = SkHalfToFloat_finite(ptr[0]);
dr = { p0[0],0,0,0 };
dg = { p0[1],0,0,0 };
db = { p0[2],0,0,0 };
da = { p0[3],0,0,0 };
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Store 4 F16 pixels.
-static void SK_VECTORCALL store_f16(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint64_t*>() + x;
+SK_RASTER_STAGE(store_f16) {
+ auto ptr = (uint64_t*)ctx + x;
Sk4h_store4(ptr, SkFloatToHalf_finite(r), SkFloatToHalf_finite(g),
SkFloatToHalf_finite(b), SkFloatToHalf_finite(a));
}
// Store 1 F16 pixel.
-static void SK_VECTORCALL store_f16_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<uint64_t*>() + x;
+SK_RASTER_STAGE(store_f16_1) {
+ auto ptr = (uint64_t*)ctx + x;
SkFloatToHalf_finite({r[0], g[0], b[0], a[0]}).store(ptr);
}
// Load 4 8-bit sRGB pixels from SkPMColor order to RGBA.
-static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_d_srgb) {
+ auto ptr = (const uint32_t*)ctx + x;
dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] };
da = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Tail variant of load_d_srgb() handling 1 pixel at a time.
-static void SK_VECTORCALL load_d_srgb_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto ptr = st->ctx<const uint32_t*>() + x;
+SK_RASTER_STAGE(load_d_srgb_1) {
+ auto ptr = (const uint32_t*)ctx + x;
dr = { sk_linear_from_srgb[(*ptr >> SK_R32_SHIFT) & 0xff], 0,0,0 };
dg = { sk_linear_from_srgb[(*ptr >> SK_G32_SHIFT) & 0xff], 0,0,0 };
db = { sk_linear_from_srgb[(*ptr >> SK_B32_SHIFT) & 0xff], 0,0,0 };
da = { (1/255.0f) * (*ptr >> SK_A32_SHIFT) , 0,0,0 };
-
- st->next(x, r,g,b,a, dr,dg,db,da);
}
// Write out 4 pixels as 8-bit SkPMColor-order sRGB.
-static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto dst = st->ctx<uint32_t*>() + x;
+SK_RASTER_STAGE(store_srgb) {
+ auto ptr = (uint32_t*)ctx + x;
( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
| sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
| sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
- | Sk4f_round(255.0f * a) << SK_A32_SHIFT).store(dst);
+ | Sk4f_round(255.0f * a) << SK_A32_SHIFT).store(ptr);
}
// Tail variant of store_srgb() handling 1 pixel at a time.
-static void SK_VECTORCALL store_srgb_1(SkRasterPipeline::Stage* st, size_t x,
- Sk4f r, Sk4f g, Sk4f b, Sk4f a,
- Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
- auto dst = st->ctx<uint32_t*>() + x;
+SK_RASTER_STAGE(store_srgb_1) {
+ auto ptr = (uint32_t*)ctx + x;
Sk4i rgb = sk_linear_to_srgb_noclamp(swizzle_rb_if_bgra({ r[0], g[0], b[0], 0.0f }));
uint32_t rgba;
SkNx_cast<uint8_t>(rgb).store(&rgba);
rgba |= (uint32_t)(255.0f * a[0] + 0.5f) << 24;
- *dst = rgba;
+ *ptr = rgba;
}
static bool supported(const SkImageInfo& info) {
color.premul());
if (!paint.getShader()) {
- blitter->fShader.append(constant_color, &blitter->fPaintColor);
+ blitter->fShader.append<constant_color>(&blitter->fPaintColor);
}
if (!paint.getXfermode()) {
- blitter->fXfermode.append(srcover);
+ blitter->fXfermode.append<srcover>();
}
return blitter;
switch (fDst.info().colorType()) {
case kN32_SkColorType:
if (fDst.info().gammaCloseToSRGB()) {
- p->append(load_d_srgb, load_d_srgb_1, dst);
+ p->append<load_d_srgb, load_d_srgb_1>(dst);
}
break;
case kRGBA_F16_SkColorType:
- p->append(load_d_f16, load_d_f16_1, dst);
+ p->append<load_d_f16, load_d_f16_1>(dst);
break;
case kRGB_565_SkColorType:
- p->append(load_d_565, load_d_565_1, dst);
+ p->append<load_d_565, load_d_565_1>(dst);
break;
default: break;
}
void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const {
SkASSERT(supported(fDst.info()));
- p->append(clamp_01_premul);
+ p->append<clamp_01_premul>();
switch (fDst.info().colorType()) {
case kN32_SkColorType:
if (fDst.info().gammaCloseToSRGB()) {
- p->append(store_srgb, store_srgb_1, dst);
+ p->append<store_srgb, store_srgb_1>(dst);
}
break;
case kRGBA_F16_SkColorType:
- p->append(store_f16, store_f16_1, dst);
+ p->append<store_f16, store_f16_1>(dst);
break;
case kRGB_565_SkColorType:
- p->append(store_565, store_565_1, dst);
+ p->append<store_565, store_565_1>(dst);
break;
default: break;
}
p.extend(fColorFilter);
this->append_load_d(&p, dst);
p.extend(fXfermode);
- p.append(lerp_constant_float, &coverage);
+ p.append<lerp_constant_float>(&coverage);
this->append_store(&p, dst);
for (int16_t run = *runs; run > 0; run = *runs) {
p.extend(fXfermode);
switch (mask.fFormat) {
case SkMask::kA8_Format:
- p.append(lerp_a8, lerp_a8_1, mask.getAddr8(x,y)-x);
+ p.append<lerp_a8, lerp_a8_1>(mask.getAddr8(x,y)-x);
break;
case SkMask::kLCD16_Format:
- p.append(lerp_lcd16, lerp_lcd16_1, mask.getAddrLCD16(x,y)-x);
+ p.append<lerp_lcd16, lerp_lcd16_1>(mask.getAddrLCD16(x,y)-x);
break;
default: break;
}
#include "SkRasterPipeline.h"
// load needs two variants, one to load 4 values...
-static void SK_VECTORCALL load(SkRasterPipeline::Stage* st, size_t x,
- Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
- Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
- auto ptr = st->ctx<const float*>();
- v0 = Sk4f{ptr[x+0]};
- v1 = Sk4f{ptr[x+1]};
- v2 = Sk4f{ptr[x+2]};
- v3 = Sk4f{ptr[x+3]};
-
- st->next(x, v0,v1,v2,v3, v4,v5,v6,v7);
+SK_RASTER_STAGE(load) {
+ auto ptr = (const float*)ctx + x;
+ r = Sk4f{ptr[0]};
+ g = Sk4f{ptr[1]};
+ b = Sk4f{ptr[2]};
+ a = Sk4f{ptr[3]};
}
// ...and one to load a single value.
-static void SK_VECTORCALL load_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
- Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
- auto ptr = st->ctx<const float*>();
- v0 = Sk4f{ptr[x]};
-
- st->next(x, v0,v1,v2,v3, v4,v5,v6,v7);
+SK_RASTER_STAGE(load_tail) {
+ auto ptr = (const float*)ctx + x;
+ r = Sk4f{*ptr};
}
// square doesn't really care how many of its inputs are active, nor does it need a context.
-static void SK_VECTORCALL square(SkRasterPipeline::Stage* st, size_t x,
- Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
- Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
- v0 *= v0;
- v1 *= v1;
- v2 *= v2;
- v3 *= v3;
- st->next(x, v0,v1,v2,v3, v4,v5,v6,v7);
+SK_RASTER_STAGE(square) {
+ r *= r;
+ g *= g;
+ b *= b;
+ a *= a;
}
-// Like load, store has a _tail variant. It ends the pipeline by returning.
-static void SK_VECTORCALL store(SkRasterPipeline::Stage* st, size_t x,
- Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
- Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
- auto ptr = st->ctx<float*>();
- ptr[x+0] = v0[0];
- ptr[x+1] = v1[0];
- ptr[x+2] = v2[0];
- ptr[x+3] = v3[0];
+// Like load, store has a _tail variant.
+SK_RASTER_STAGE(store) {
+ auto ptr = (float*)ctx + x;
+ ptr[0] = r[0];
+ ptr[1] = g[0];
+ ptr[2] = b[0];
+ ptr[3] = a[0];
}
-static void SK_VECTORCALL store_tail(SkRasterPipeline::Stage* st, size_t x,
- Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
- Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
- auto ptr = st->ctx<float*>();
- ptr[x+0] = v0[0];
+SK_RASTER_STAGE(store_tail) {
+ auto ptr = (float*)ctx + x;
+ *ptr = r[0];
}
DEF_TEST(SkRasterPipeline, r) {
float dst_vals[] = { 0,0,0,0,0 };
SkRasterPipeline p;
- p.append(load, load_tail, src_vals);
- p.append(square);
- p.append(store, store_tail, dst_vals);
+ p.append<load, load_tail>(src_vals);
+ p.append<square>();
+ p.append<store, store_tail>(dst_vals);
p.run(5);
// No asserts... just a test that this is safe to run and terminates.
// square() always calls st->next(); this makes sure we've always got something there to call.
SkRasterPipeline p;
- p.append(square);
+ p.append<square>();
p.run(20);
}