// This lets us exploit vld4/vst4 and represent SkPx as planar uint8x8x4_t,
// Wide as planar uint16x8x4_t, and Alpha as a single uint8x8_t plane.
-struct SkPx_neon {
+namespace neon {
+
+struct SkPx {
static const int N = 8;
uint8x8x4_t fVec;
- SkPx_neon(uint8x8x4_t vec) : fVec(vec) {}
+ SkPx(uint8x8x4_t vec) : fVec(vec) {}
- static SkPx_neon Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); }
- static SkPx_neon Load(const uint32_t* px) { return vld4_u8((const uint8_t*)px); }
- static SkPx_neon Load(const uint32_t* px, int n) {
+ static SkPx Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); }
+ static SkPx Load(const uint32_t* px) { return vld4_u8((const uint8_t*)px); }
+ static SkPx Load(const uint32_t* px, int n) {
SkASSERT(0 < n && n < 8);
uint8x8x4_t v = vld4_dup_u8((const uint8_t*)px); // n>=1, so start all lanes with pixel 0.
switch (n) {
}};
}
- SkPx_neon addNarrowHi(const SkPx_neon& o) const {
+ SkPx addNarrowHi(const SkPx& o) const {
return (uint8x8x4_t) {{
vshrn_n_u16(vaddw_u8(fVec.val[0], o.fVec.val[0]), 8),
vshrn_n_u16(vaddw_u8(fVec.val[1], o.fVec.val[1]), 8),
Wide widenHi() const { return this->widenLo().shl<8>(); }
Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }
- SkPx_neon operator+(const SkPx_neon& o) const {
+ SkPx operator+(const SkPx& o) const {
return (uint8x8x4_t) {{
vadd_u8(fVec.val[0], o.fVec.val[0]),
vadd_u8(fVec.val[1], o.fVec.val[1]),
vadd_u8(fVec.val[3], o.fVec.val[3]),
}};
}
- SkPx_neon operator-(const SkPx_neon& o) const {
+ SkPx operator-(const SkPx& o) const {
return (uint8x8x4_t) {{
vsub_u8(fVec.val[0], o.fVec.val[0]),
vsub_u8(fVec.val[1], o.fVec.val[1]),
vsub_u8(fVec.val[3], o.fVec.val[3]),
}};
}
- SkPx_neon saturatedAdd(const SkPx_neon& o) const {
+ SkPx saturatedAdd(const SkPx& o) const {
return (uint8x8x4_t) {{
vqadd_u8(fVec.val[0], o.fVec.val[0]),
vqadd_u8(fVec.val[1], o.fVec.val[1]),
vmull_u8(fVec.val[3], a.fA),
}};
}
- SkPx_neon approxMulDiv255(const Alpha& a) const {
+ SkPx approxMulDiv255(const Alpha& a) const {
return (*this * a).addNarrowHi(*this);
}
- SkPx_neon addAlpha(const Alpha& a) const {
+ SkPx addAlpha(const Alpha& a) const {
return (uint8x8x4_t) {{
fVec.val[0],
fVec.val[1],
}};
}
};
-typedef SkPx_neon SkPx;
+
+} // namespace neon
+
+typedef neon::SkPx SkPx;
#endif//SkPx_neon_DEFINED
// Nothing fancy here. We're the backup _none case after all.
// Our declared sweet spot is simply a single pixel at a time.
-struct SkPx_none {
+namespace none {
+
+struct SkPx {
static const int N = 1;
uint8_t f8[4];
- SkPx_none(uint32_t px) { memcpy(f8, &px, 4); }
- SkPx_none(uint8_t x, uint8_t y, uint8_t z, uint8_t a) {
+ SkPx(uint32_t px) { memcpy(f8, &px, 4); }
+ SkPx(uint8_t x, uint8_t y, uint8_t z, uint8_t a) {
f8[0] = x; f8[1] = y; f8[2] = z; f8[3] = a;
}
- static SkPx_none Dup(uint32_t px) { return px; }
- static SkPx_none Load(const uint32_t* px) { return *px; }
- static SkPx_none Load(const uint32_t* px, int n) {
+ static SkPx Dup(uint32_t px) { return px; }
+ static SkPx Load(const uint32_t* px) { return *px; }
+ static SkPx Load(const uint32_t* px, int n) {
SkASSERT(false); // There are no 0<n<1.
return 0;
}
return Wide(f16[0]>>bits, f16[1]>>bits, f16[2]>>bits, f16[3]>>bits);
}
- SkPx_none addNarrowHi(const SkPx_none& o) const {
+ SkPx addNarrowHi(const SkPx& o) const {
Wide sum = (*this + o.widenLo()).shr<8>();
- return SkPx_none(sum.f16[0], sum.f16[1], sum.f16[2], sum.f16[3]);
+ return SkPx(sum.f16[0], sum.f16[1], sum.f16[2], sum.f16[3]);
}
};
Wide widenHi() const { return this->widenLo().shl<8>(); }
Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }
- SkPx_none operator+(const SkPx_none& o) const {
- return SkPx_none(f8[0]+o.f8[0], f8[1]+o.f8[1], f8[2]+o.f8[2], f8[3]+o.f8[3]);
+ SkPx operator+(const SkPx& o) const {
+ return SkPx(f8[0]+o.f8[0], f8[1]+o.f8[1], f8[2]+o.f8[2], f8[3]+o.f8[3]);
}
- SkPx_none operator-(const SkPx_none& o) const {
- return SkPx_none(f8[0]-o.f8[0], f8[1]-o.f8[1], f8[2]-o.f8[2], f8[3]-o.f8[3]);
+ SkPx operator-(const SkPx& o) const {
+ return SkPx(f8[0]-o.f8[0], f8[1]-o.f8[1], f8[2]-o.f8[2], f8[3]-o.f8[3]);
}
- SkPx_none saturatedAdd(const SkPx_none& o) const {
- return SkPx_none(SkTMax(0, SkTMin(255, f8[0]+o.f8[0])),
+ SkPx saturatedAdd(const SkPx& o) const {
+ return SkPx(SkTMax(0, SkTMin(255, f8[0]+o.f8[0])),
SkTMax(0, SkTMin(255, f8[1]+o.f8[1])),
SkTMax(0, SkTMin(255, f8[2]+o.f8[2])),
SkTMax(0, SkTMin(255, f8[3]+o.f8[3])));
Wide operator*(const Alpha& a) const {
return Wide(f8[0]*a.fA, f8[1]*a.fA, f8[2]*a.fA, f8[3]*a.fA);
}
- SkPx_none approxMulDiv255(const Alpha& a) const {
+ SkPx approxMulDiv255(const Alpha& a) const {
return (*this * a).addNarrowHi(*this);
}
- SkPx_none addAlpha(const Alpha& a) const {
- return SkPx_none(f8[0], f8[1], f8[2], f8[3]+a.fA);
+ SkPx addAlpha(const Alpha& a) const {
+ return SkPx(f8[0], f8[1], f8[2], f8[3]+a.fA);
}
};
-typedef SkPx_none SkPx;
+
+} // namespace none
+
+typedef none::SkPx SkPx;
#endif//SkPx_none_DEFINED
#ifndef SkPx_sse_DEFINED
#define SkPx_sse_DEFINED
-// SkPx_sse's sweet spot is to work with 4 pixels at a time,
+// sse::SkPx's sweet spot is to work with 4 pixels at a time,
// stored interlaced, just as they sit in memory: rgba rgba rgba rgba.
-// SkPx_sse's best way to work with alphas is similar,
+// sse::SkPx's best way to work with alphas is similar,
// replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.
// When working with fewer than 4 pixels, we load the pixels in the low lanes,
// usually filling the top lanes with zeros (but who cares, might be junk).
-struct SkPx_sse {
+namespace sse {
+
+struct SkPx {
static const int N = 4;
__m128i fVec;
- SkPx_sse(__m128i vec) : fVec(vec) {}
+ SkPx(__m128i vec) : fVec(vec) {}
- static SkPx_sse Dup(uint32_t px) { return _mm_set1_epi32(px); }
- static SkPx_sse Load(const uint32_t* px) { return _mm_loadu_si128((const __m128i*)px); }
- static SkPx_sse Load(const uint32_t* px, int n) {
+ static SkPx Dup(uint32_t px) { return _mm_set1_epi32(px); }
+ static SkPx Load(const uint32_t* px) { return _mm_loadu_si128((const __m128i*)px); }
+ static SkPx Load(const uint32_t* px, int n) {
SkASSERT(n > 0 && n < 4);
switch (n) {
case 1: return _mm_cvtsi32_si128(px[0]);
return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));
}
- SkPx_sse addNarrowHi(const SkPx_sse& o) const {
+ SkPx addNarrowHi(const SkPx& o) const {
Wide sum = (*this + o.widenLo()).shr<8>();
return _mm_packus_epi16(sum.fLo, sum.fHi);
}
_mm_unpackhi_epi8(fVec, fVec));
}
- SkPx_sse operator+(const SkPx_sse& o) const { return _mm_add_epi8(fVec, o.fVec); }
- SkPx_sse operator-(const SkPx_sse& o) const { return _mm_sub_epi8(fVec, o.fVec); }
- SkPx_sse saturatedAdd(const SkPx_sse& o) const { return _mm_adds_epi8(fVec, o.fVec); }
+ SkPx operator+(const SkPx& o) const { return _mm_add_epi8(fVec, o.fVec); }
+ SkPx operator-(const SkPx& o) const { return _mm_sub_epi8(fVec, o.fVec); }
+ SkPx saturatedAdd(const SkPx& o) const { return _mm_adds_epi8(fVec, o.fVec); }
Wide operator*(const Alpha& a) const {
__m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),
aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());
return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));
}
- SkPx_sse approxMulDiv255(const Alpha& a) const {
+ SkPx approxMulDiv255(const Alpha& a) const {
return (*this * a).addNarrowHi(*this);
}
- SkPx_sse addAlpha(const Alpha& a) const {
+ SkPx addAlpha(const Alpha& a) const {
return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF000000)));
}
};
-typedef SkPx_sse SkPx;
+} // namespace sse
+
+typedef sse::SkPx SkPx;
#endif//SkPx_sse_DEFINED