2 * Copyright 2016 Google Inc.
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
8 #ifndef SkRasterPipeline_opts_DEFINED
9 #define SkRasterPipeline_opts_DEFINED
11 #include "SkColorPriv.h"
12 #include "SkColorLookUpTable.h"
13 #include "SkColorSpaceXform_A2B.h"
14 #include "SkColorSpaceXformPriv.h"
17 #include "SkPM4fPriv.h"
18 #include "SkRasterPipeline.h"
25 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
26 static constexpr int N = 8;
28 static constexpr int N = 4;
31 using SkNf = SkNx<N, float>;
32 using SkNi = SkNx<N, int>;
33 using SkNh = SkNx<N, uint16_t>;
38 using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
40 using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
42 struct BodyStage { Body next; void* ctx; };
43 struct TailStage { Tail next; void* ctx; };
47 #define SI static inline
49 // Stages are logically a pipeline, and physically are contiguous in an array.
50 // To get to the next stage, we just increment our pointer to the next array element.
51 SI void SK_VECTORCALL next(BodyStage* st, size_t x,
52 SkNf r, SkNf g, SkNf b, SkNf a,
53 SkNf dr, SkNf dg, SkNf db, SkNf da) {
54 st->next(st+1, x, r,g,b,a, dr,dg,db,da);
56 SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
57 SkNf r, SkNf g, SkNf b, SkNf a,
58 SkNf dr, SkNf dg, SkNf db, SkNf da) {
59 st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
63 #define STAGE(name, kCallNext) \
64 template <bool kIsTail> \
65 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
66 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
67 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
68 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
69 SkNf r, SkNf g, SkNf b, SkNf a, \
70 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
71 name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
73 next(st, x, r,g,b,a, dr,dg,db,da); \
76 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
77 SkNf r, SkNf g, SkNf b, SkNf a, \
78 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
79 name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
81 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
84 template <bool kIsTail> \
85 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
86 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
87 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
90 // Many xfermodes apply the same logic to each channel.
91 #define RGBA_XFERMODE(name) \
92 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
93 const SkNf& d, const SkNf& da); \
94 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
95 SkNf r, SkNf g, SkNf b, SkNf a, \
96 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
97 r = name##_kernel(r,a,dr,da); \
98 g = name##_kernel(g,a,dg,da); \
99 b = name##_kernel(b,a,db,da); \
100 a = name##_kernel(a,a,da,da); \
101 next(st, x, r,g,b,a, dr,dg,db,da); \
103 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
104 SkNf r, SkNf g, SkNf b, SkNf a, \
105 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
106 r = name##_kernel(r,a,dr,da); \
107 g = name##_kernel(g,a,dg,da); \
108 b = name##_kernel(b,a,db,da); \
109 a = name##_kernel(a,a,da,da); \
110 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
112 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
113 const SkNf& d, const SkNf& da)
115 // Most of the rest apply the same logic to color channels and use srcover's alpha logic.
116 #define RGB_XFERMODE(name) \
117 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
118 const SkNf& d, const SkNf& da); \
119 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
120 SkNf r, SkNf g, SkNf b, SkNf a, \
121 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
122 r = name##_kernel(r,a,dr,da); \
123 g = name##_kernel(g,a,dg,da); \
124 b = name##_kernel(b,a,db,da); \
125 a = a + (da * (1.0f-a)); \
126 next(st, x, r,g,b,a, dr,dg,db,da); \
128 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
129 SkNf r, SkNf g, SkNf b, SkNf a, \
130 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
131 r = name##_kernel(r,a,dr,da); \
132 g = name##_kernel(g,a,dg,da); \
133 b = name##_kernel(b,a,db,da); \
134 a = a + (da * (1.0f-a)); \
135 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
137 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
138 const SkNf& d, const SkNf& da)
140 SI SkNf inv(const SkNf& x) { return 1.0f - x; }
142 SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
143 return SkNx_fma(to-from, cov, from);
146 template <bool kIsTail, typename T>
147 SI SkNx<N,T> load(size_t tail, const T* src) {
148 SkASSERT(kIsTail == (tail > 0));
149 // TODO: maskload for 32- and 64-bit T
152 switch (tail & (N-1)) {
153 case 7: buf[6] = src[6];
154 case 6: buf[5] = src[5];
155 case 5: buf[4] = src[4];
156 case 4: buf[3] = src[3];
157 case 3: buf[2] = src[2];
158 case 2: buf[1] = src[1];
161 return SkNx<N,T>::Load(buf);
163 return SkNx<N,T>::Load(src);
166 template <bool kIsTail, typename T>
167 SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
168 SkASSERT(kIsTail == (tail > 0));
169 // TODO: maskstore for 32- and 64-bit T
171 switch (tail & (N-1)) {
172 case 7: dst[6] = v[6];
173 case 6: dst[5] = v[5];
174 case 5: dst[4] = v[4];
175 case 4: dst[3] = v[3];
176 case 3: dst[2] = v[2];
177 case 2: dst[1] = v[1];
185 SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
186 auto _32_bit = SkNx_cast<int>(_565);
188 *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
189 *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
190 *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
193 SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
194 return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
195 | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
196 | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
199 STAGE(just_return, false) { }
202 SkDebugf("%s\n", (const char*)ctx);
205 STAGE(registers, true) {
206 auto print = [](const char* name, const SkNf& v) {
207 SkDebugf("%s:", name);
208 for (int i = 0; i < N; i++) {
209 SkDebugf(" %g", v[i]);
223 STAGE(clamp_0, true) {
224 a = SkNf::Max(a, 0.0f);
225 r = SkNf::Max(r, 0.0f);
226 g = SkNf::Max(g, 0.0f);
227 b = SkNf::Max(b, 0.0f);
230 STAGE(clamp_a, true) {
231 a = SkNf::Min(a, 1.0f);
237 STAGE(clamp_1, true) {
238 a = SkNf::Min(a, 1.0f);
239 r = SkNf::Min(r, 1.0f);
240 g = SkNf::Min(g, 1.0f);
241 b = SkNf::Min(b, 1.0f);
244 STAGE(unpremul, true) {
250 STAGE(premul, true) {
256 STAGE(move_src_dst, true) {
263 STAGE(swap_src_dst, true) {
270 // The default shader produces a constant color (from the SkPaint).
271 STAGE(constant_color, true) {
272 auto color = (const SkPM4f*)ctx;
279 // s' = sc for a constant c.
280 STAGE(scale_constant_float, true) {
281 SkNf c = *(const float*)ctx;
289 // s' = d(1-c) + sc, for a constant c.
290 STAGE(lerp_constant_float, true) {
291 SkNf c = *(const float*)ctx;
299 // s' = sc for 8-bit c.
300 STAGE(scale_u8, true) {
301 auto ptr = *(const uint8_t**)ctx + x;
303 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
310 // s' = d(1-c) + sc for 8-bit c.
311 STAGE(lerp_u8, true) {
312 auto ptr = *(const uint8_t**)ctx + x;
314 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
321 // s' = d(1-c) + sc for 565 c.
322 STAGE(lerp_565, true) {
323 auto ptr = *(const uint16_t**)ctx + x;
325 from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
333 STAGE(load_d_565, true) {
334 auto ptr = *(const uint16_t**)ctx + x;
335 from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
339 STAGE(load_s_565, true) {
340 auto ptr = *(const uint16_t**)ctx + x;
341 from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
345 STAGE(store_565, false) {
346 auto ptr = *(uint16_t**)ctx + x;
347 store<kIsTail>(tail, to_565(r,g,b), ptr);
350 STAGE(load_d_f16, true) {
351 auto ptr = *(const uint64_t**)ctx + x;
355 uint64_t buf[8] = {0};
356 switch (tail & (N-1)) {
357 case 7: buf[6] = ptr[6];
358 case 6: buf[5] = ptr[5];
359 case 5: buf[4] = ptr[4];
360 case 4: buf[3] = ptr[3];
361 case 3: buf[2] = ptr[2];
362 case 2: buf[1] = ptr[1];
365 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
367 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
370 dr = SkHalfToFloat_finite_ftz(rh);
371 dg = SkHalfToFloat_finite_ftz(gh);
372 db = SkHalfToFloat_finite_ftz(bh);
373 da = SkHalfToFloat_finite_ftz(ah);
376 STAGE(load_s_f16, true) {
377 auto ptr = *(const uint64_t**)ctx + x;
381 uint64_t buf[8] = {0};
382 switch (tail & (N-1)) {
383 case 7: buf[6] = ptr[6];
384 case 6: buf[5] = ptr[5];
385 case 5: buf[4] = ptr[4];
386 case 4: buf[3] = ptr[3];
387 case 3: buf[2] = ptr[2];
388 case 2: buf[1] = ptr[1];
391 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
393 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
396 r = SkHalfToFloat_finite_ftz(rh);
397 g = SkHalfToFloat_finite_ftz(gh);
398 b = SkHalfToFloat_finite_ftz(bh);
399 a = SkHalfToFloat_finite_ftz(ah);
402 STAGE(store_f16, false) {
403 auto ptr = *(uint64_t**)ctx + x;
406 SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
407 SkFloatToHalf_finite_ftz(g),
408 SkFloatToHalf_finite_ftz(b),
409 SkFloatToHalf_finite_ftz(a));
411 switch (tail & (N-1)) {
412 case 7: ptr[6] = buf[6];
413 case 6: ptr[5] = buf[5];
414 case 5: ptr[4] = buf[4];
415 case 4: ptr[3] = buf[3];
416 case 3: ptr[2] = buf[2];
417 case 2: ptr[1] = buf[1];
423 STAGE(store_f32, false) {
424 auto ptr = *(SkPM4f**)ctx + x;
427 SkNf::Store4(kIsTail ? buf : ptr, r,g,b,a);
429 switch (tail & (N-1)) {
430 case 7: ptr[6] = buf[6];
431 case 6: ptr[5] = buf[5];
432 case 5: ptr[4] = buf[4];
433 case 4: ptr[3] = buf[3];
434 case 3: ptr[2] = buf[2];
435 case 2: ptr[1] = buf[1];
442 // Load 8-bit SkPMColor-order sRGB.
443 STAGE(load_d_srgb, true) {
444 auto ptr = *(const uint32_t**)ctx + x;
446 auto px = load<kIsTail>(tail, ptr);
447 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
448 dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
449 dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
450 db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
451 da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
454 STAGE(load_s_srgb, true) {
455 auto ptr = *(const uint32_t**)ctx + x;
457 auto px = load<kIsTail>(tail, ptr);
458 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
459 r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
460 g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
461 b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
462 a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
465 STAGE(store_srgb, false) {
466 auto ptr = *(uint32_t**)ctx + x;
467 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
468 | sk_linear_to_srgb(g) << SK_G32_SHIFT
469 | sk_linear_to_srgb(b) << SK_B32_SHIFT
470 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
473 STAGE(load_s_8888, true) {
474 auto ptr = *(const uint32_t**)ctx + x;
476 auto px = load<kIsTail>(tail, ptr);
477 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
478 r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xff));
479 g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xff));
480 b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xff));
481 a = (1/255.0f)*SkNx_cast<float>(to_int(px >> 24));
484 STAGE(store_8888, false) {
485 auto ptr = *(uint32_t**)ctx + x;
486 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
487 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
488 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
489 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
492 RGBA_XFERMODE(clear) { return 0.0f; }
493 //RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
494 RGBA_XFERMODE(dst) { return d; }
496 RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
497 RGBA_XFERMODE(srcin) { return s * da; }
498 RGBA_XFERMODE(srcout) { return s * inv(da); }
499 RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
500 RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
501 RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
502 RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
503 RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
505 RGBA_XFERMODE(modulate) { return s*d; }
506 RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
507 RGBA_XFERMODE(plus_) { return s + d; }
508 RGBA_XFERMODE(screen) { return s + d - s*d; }
509 RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
511 RGB_XFERMODE(colorburn) {
512 return (d == da ).thenElse(d + s*inv(da),
513 (s == 0.0f).thenElse(s + d*inv(sa),
514 sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
516 RGB_XFERMODE(colordodge) {
517 return (d == 0.0f).thenElse(d + s*inv(da),
518 (s == sa ).thenElse(s + d*inv(sa),
519 sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
521 RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
522 RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
523 RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
524 RGB_XFERMODE(hardlight) {
525 return s*inv(da) + d*inv(sa)
526 + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
528 RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
529 RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
530 RGB_XFERMODE(softlight) {
531 SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
535 // The logic forks three ways:
537 // 2. light src, dark dst?
538 // 3. light src, light dst?
539 SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
540 darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
541 liteDst = m.rsqrt().invert() - m, // Used in case 3.
542 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
543 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
546 STAGE(luminance_to_alpha, true) {
547 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
551 STAGE(matrix_3x4, true) {
552 auto m = (const float*)ctx;
554 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
555 auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))),
556 G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))),
557 B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11])));
563 STAGE(matrix_4x5, true) {
564 auto m = (const float*)ctx;
566 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
567 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
568 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
569 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
570 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
577 SI SkNf parametric(const SkNf& v, const SkColorSpaceTransferFn& p) {
578 float result[N]; // Unconstrained powf() doesn't vectorize well...
579 for (int i = 0; i < N; i++) {
581 result[i] = (s <= p.fD) ? p.fC * s + p.fF
582 : powf(s * p.fA + p.fB, p.fG) + p.fE;
584 return SkNf::Load(result);
587 STAGE(parametric_r, true) {
588 r = parametric(r, *(const SkColorSpaceTransferFn*)ctx);
590 STAGE(parametric_g, true) {
591 g = parametric(g, *(const SkColorSpaceTransferFn*)ctx);
593 STAGE(parametric_b, true) {
594 b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
597 SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
599 for (int i = 0; i < N; i++) {
600 result[i] = interp_lut(v[i], table.fData, table.fSize);
602 return SkNf::Load(result);
605 STAGE(table_r, true) {
606 r = table(r, *(const SkTableTransferFn*)ctx);
608 STAGE(table_g, true) {
609 g = table(g, *(const SkTableTransferFn*)ctx);
611 STAGE(table_b, true) {
612 b = table(b, *(const SkTableTransferFn*)ctx);
615 STAGE(color_lookup_table, true) {
616 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
619 for (int i = 0; i < N; ++i) {
623 colorLUT->interp3D(rgb, rgb);
624 result[0][i] = rgb[0];
625 result[1][i] = rgb[1];
626 result[2][i] = rgb[2];
628 r = SkNf::Load(result[0]);
629 g = SkNf::Load(result[1]);
630 b = SkNf::Load(result[2]);
633 STAGE(lab_to_xyz, true) {
634 const auto lab_l = r * 100.0f;
635 const auto lab_a = g * 255.0f - 128.0f;
636 const auto lab_b = b * 255.0f - 128.0f;
637 auto Y = (lab_l + 16.0f) * (1/116.0f);
638 auto X = lab_a * (1/500.0f) + Y;
639 auto Z = Y - (lab_b * (1/200.0f));
641 const auto X3 = X*X*X;
642 X = (X3 > 0.008856f).thenElse(X3, (X - (16/116.0f)) * (1/7.787f));
643 const auto Y3 = Y*Y*Y;
644 Y = (Y3 > 0.008856f).thenElse(Y3, (Y - (16/116.0f)) * (1/7.787f));
645 const auto Z3 = Z*Z*Z;
646 Z = (Z3 > 0.008856f).thenElse(Z3, (Z - (16/116.0f)) * (1/7.787f));
648 // adjust to D50 illuminant
658 STAGE(swap_rb, true) {
662 template <typename Fn>
663 SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
665 #define M(stage) case SkRasterPipeline::stage: return stage;
666 SK_RASTER_PIPELINE_STAGES(M)
673 namespace SK_OPTS_NS {
678 void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
684 void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
690 void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
693 SI std::function<void(size_t, size_t, size_t)>
694 compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
695 if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
696 SkPM4f src = *(const SkPM4f*)stages[0].ctx;
697 void* dst = stages[1].ctx;
698 switch (stages[1].stage) {
699 case SkRasterPipeline::store_565:
700 return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f,
701 src.g() * SK_G16_MASK + 0.5f,
702 src.b() * SK_B16_MASK + 0.5f)};
703 case SkRasterPipeline::store_srgb:
704 return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())};
706 case SkRasterPipeline::store_f16:
707 return Memset64{(uint64_t**)dst, src.toF16()};
714 Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
719 fBodyStart = enum_to_Fn<Body>(stages[0].stage);
720 fTailStart = enum_to_Fn<Tail>(stages[0].stage);
721 for (int i = 0; i < nstages-1; i++) {
722 fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
723 fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
724 fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
726 fBody[nstages-1].next = just_return;
727 fTail[nstages-1].next = just_return;
728 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
731 void operator()(size_t x, size_t y, size_t n) {
732 #ifdef SK_BUILD_FOR_WINRT
733 SkNf v = SkNf(); // gives uninitialized variable 'v' on ARM
735 SkNf v; // Fastest to start uninitialized.
738 float dx[] = { 0,1,2,3,4,5,6,7 };
739 SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
743 fBodyStart(fBody, x, v,v,v,v, X,Y,v,v);
749 fTailStart(fTail, x,n, v,v,v,v, X,Y,v,v);
753 Body fBodyStart = just_return;
754 Tail fTailStart = just_return;
756 BodyStage fBody[SkRasterPipeline::kMaxStages];
757 TailStage fTail[SkRasterPipeline::kMaxStages];
759 } fn { stages, nstages };
763 } // namespace SK_OPTS_NS
770 #endif//SkRasterPipeline_opts_DEFINED