// A pre-multiplied color storing each component in the same order as SkPMColor,
// but as a float in the range [0, 255].
-class SK_STRUCT_ALIGN(16) SkPMFloat {
+class SkPMFloat : public Sk4f {
public:
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
explicit SkPMFloat(SkPMColor);
SkPMFloat(float a, float r, float g, float b)
#ifdef SK_PMCOLOR_IS_RGBA
- : fColors(r,g,b,a) {}
+ : INHERITED(r,g,b,a) {}
#else
- : fColors(b,g,r,a) {}
+ : INHERITED(b,g,r,a) {}
#endif
+ SkPMFloat(const Sk4f& fs) : INHERITED(fs) {}
- // Freely autoconvert between SkPMFloat and Sk4f.
- /*implicit*/ SkPMFloat(const Sk4f& fs) { fColors = fs; }
- /*implicit*/ operator Sk4f() const { return fColors; }
-
- float a() const { return fColors.kth<SK_A32_SHIFT / 8>(); }
- float r() const { return fColors.kth<SK_R32_SHIFT / 8>(); }
- float g() const { return fColors.kth<SK_G32_SHIFT / 8>(); }
- float b() const { return fColors.kth<SK_B32_SHIFT / 8>(); }
+ float a() const { return this->kth<SK_A32_SHIFT / 8>(); }
+ float r() const { return this->kth<SK_R32_SHIFT / 8>(); }
+ float g() const { return this->kth<SK_G32_SHIFT / 8>(); }
+ float b() const { return this->kth<SK_B32_SHIFT / 8>(); }
// N.B. All methods returning an SkPMColor call SkPMColorAssert on that result before returning.
}
private:
- Sk4f fColors;
+ typedef Sk4f INHERITED;
};
#ifdef SKNX_NO_SIMD
struct SrcATop4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
const Sk4f inv255(gInv255);
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(d4 + (s4 * Sk4f(dst.a()) - d4 * Sk4f(src.a())) * inv255);
+ return check_as_pmfloat(dst + (src * Sk4f(dst.a()) - dst * Sk4f(src.a())) * inv255);
}
static const bool kFoldCoverageIntoSrcAlpha = true;
static const SkXfermode::Mode kMode = SkXfermode::kSrcATop_Mode;
struct DstATop4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
const Sk4f inv255(gInv255);
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(s4 + (d4 * Sk4f(src.a()) - s4 * Sk4f(dst.a())) * inv255);
+ return check_as_pmfloat(src + (dst * Sk4f(src.a()) - src * Sk4f(dst.a())) * inv255);
}
static const bool kFoldCoverageIntoSrcAlpha = false;
static const SkXfermode::Mode kMode = SkXfermode::kDstATop_Mode;
struct Xor4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
const Sk4f inv255(gInv255);
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(s4 + d4 - (s4 * Sk4f(dst.a()) + d4 * Sk4f(src.a())) * inv255);
+ return check_as_pmfloat(src + dst - (src * Sk4f(dst.a()) + dst * Sk4f(src.a())) * inv255);
}
static const bool kFoldCoverageIntoSrcAlpha = true;
static const SkXfermode::Mode kMode = SkXfermode::kXor_Mode;
// kPlus_Mode [Sa + Da, Sc + Dc]
struct Plus4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(clamp_255(s4 + d4));
+ return check_as_pmfloat(clamp_255(src + dst));
}
static const bool kFoldCoverageIntoSrcAlpha = true;
static const SkXfermode::Mode kMode = SkXfermode::kPlus_Mode;
struct Modulate4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
const Sk4f inv255(gInv255);
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(s4 * d4 * inv255);
+ return check_as_pmfloat(src * dst * inv255);
}
static const bool kFoldCoverageIntoSrcAlpha = false;
static const SkXfermode::Mode kMode = SkXfermode::kModulate_Mode;
struct Screen4f {
static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
const Sk4f inv255(gInv255);
- Sk4f s4 = src;
- Sk4f d4 = dst;
- return check_as_pmfloat(s4 + d4 - s4 * d4 * inv255);
+ return check_as_pmfloat(src + dst - src * dst * inv255);
}
static const bool kFoldCoverageIntoSrcAlpha = true;
static const SkXfermode::Mode kMode = SkXfermode::kScreen_Mode;
static Sk4f unpremul(const SkPMFloat& pm) {
float scale = 255 / pm.a(); // candidate for fast/approx invert?
- return Sk4f(pm) * Sk4f(scale, scale, scale, 1);
+ return pm * Sk4f(scale, scale, scale, 1);
}
static Sk4f clamp_0_255(const Sk4f& value) {
typedef SkNi<4, int32_t> Ni;
public:
SkNf(float32x4_t vec) : fVec(vec) {}
- float32x4_t vec() const { return fVec; }
SkNf() {}
explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}
return vgetq_lane_f32(fVec, k&3);
}
-private:
+protected:
float32x4_t fVec;
};
typedef SkNi<4, int32_t> Ni;
public:
SkNf(const __m128& vec) : fVec(vec) {}
- __m128 vec() const { return fVec; }
SkNf() {}
explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
return pun.fs[k&3];
}
-private:
+protected:
__m128 fVec;
};
__m128i fix8 = _mm_set_epi32(0,0,0,c),
fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
- fColors = _mm_cvtepi32_ps(fix8_32);
+ fVec = _mm_cvtepi32_ps(fix8_32);
SkASSERT(this->isValid());
}
inline SkPMColor SkPMFloat::roundClamp() const {
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec())),
+ __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)),
fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
fix8 = _mm_packus_epi16(fix8_16, fix8_16);
SkPMColor c = _mm_cvtsi128_si32(fix8);
inline SkPMColor SkPMFloat::trunc() const {
// Basically, same as roundClamp(), but no rounding.
- __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),
+ __m128i fix8_32 = _mm_cvttps_epi32(fVec),
fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
fix8 = _mm_packus_epi16(fix8_16, fix8_16);
SkPMColor c = _mm_cvtsi128_si32(fix8);
SkPMColor colors[4]) {
// Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec())),
- c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec())),
- c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec())),
- c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()));
+ __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)),
+ c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)),
+ c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)),
+ c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec));
__m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),
_mm_packus_epi16(c2, c3));
_mm_storeu_si128((__m128i*)colors, c3210);
const int _ = 255; // _ means to zero that byte.
__m128i fix8 = _mm_set_epi32(0,0,0,c),
fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_,_,1, _,_,_,0));
- fColors = _mm_cvtepi32_ps(fix8_32);
+ fVec = _mm_cvtepi32_ps(fix8_32);
SkASSERT(this->isValid());
}
inline SkPMColor SkPMFloat::trunc() const {
const int _ = 255; // _ means to zero that byte.
- __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),
+ __m128i fix8_32 = _mm_cvttps_epi32(fVec),
fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0));
SkPMColor c = _mm_cvtsi128_si32(fix8);
SkPMColorAssert(c);
inline SkPMColor SkPMFloat::roundClamp() const {
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec())),
+ __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)),
fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
fix8 = _mm_packus_epi16(fix8_16, fix8_16);
SkPMColor c = _mm_cvtsi128_si32(fix8);
SkPMColor colors[4]) {
// Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec())),
- c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec())),
- c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec())),
- c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()));
+ __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)),
+ c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)),
+ c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)),
+ c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec));
__m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),
_mm_packus_epi16(c2, c3));
_mm_storeu_si128((__m128i*)colors, c3210);
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
uint16x8_t fix8_16 = vmovl_u8(fix8);
uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
- fColors = vcvtq_f32_u32(fix8_32);
+ fVec = vcvtq_f32_u32(fix8_32);
SkASSERT(this->isValid());
}
inline SkPMColor SkPMFloat::trunc() const {
- uint32x4_t fix8_32 = vcvtq_u32_f32(fColors.vec()); // vcvtq_u32_f32 truncates
+ uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates
uint16x4_t fix8_16 = vmovn_u32(fix8_32);
uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
}
inline SkPMColor SkPMFloat::roundClamp() const {
- float32x4_t add_half = vaddq_f32(fColors.vec(), vdupq_n_f32(0.5f));
+ float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f));
uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));