From 507ef6d68115ae9e6d884bb36436a1463523d893 Mon Sep 17 00:00:00 2001 From: mtklein Date: Sun, 31 Jan 2016 08:02:47 -0800 Subject: [PATCH] SkNx Load/store: take any pointer. This means we can remove a lot of explicit casts in code that uses SkNx. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1650653002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1650653002 --- bench/Sk4fBench.cpp | 4 ++-- src/core/SkColor.cpp | 10 ++++---- src/core/SkGeometry.cpp | 4 ++-- src/core/SkGeometry.h | 4 ++-- src/core/SkMipMap.cpp | 4 ++-- src/core/SkNx.h | 16 +++++++++---- src/core/SkRect.cpp | 4 ++-- src/core/SkScan_Hairline.cpp | 8 +++---- src/effects/SkColorMatrixFilter.cpp | 8 +++---- src/effects/gradients/SkLinearGradient.cpp | 6 ++--- src/opts/SkColorCubeFilter_opts.h | 8 +++---- src/opts/SkMatrix_opts.h | 18 +++++++------- src/opts/SkNx_avx.h | 4 ++-- src/opts/SkNx_neon.h | 32 ++++++++++++------------- src/opts/SkNx_sse.h | 38 +++++++++++++++--------------- src/opts/SkXfermode_opts.h | 4 ++-- 16 files changed, 90 insertions(+), 82 deletions(-) diff --git a/bench/Sk4fBench.cpp b/bench/Sk4fBench.cpp index 1daa3f2..712a657 100644 --- a/bench/Sk4fBench.cpp +++ b/bench/Sk4fBench.cpp @@ -33,8 +33,8 @@ struct Sk4fBytesRoundtripBench : public Benchmark { for (int i = 0; i < loops; i++) { uint32_t color = lcg_rand(&seed), back; - auto f = SkNx_cast(Sk4b::Load((const uint8_t*)&color)); - SkNx_cast(f).store((uint8_t*)&back); + auto f = SkNx_cast(Sk4b::Load(&color)); + SkNx_cast(f).store(&back); junk ^= back; } blackhole ^= junk; diff --git a/src/core/SkColor.cpp b/src/core/SkColor.cpp index cf6e0b2..865fe0d 100644 --- a/src/core/SkColor.cpp +++ b/src/core/SkColor.cpp @@ -105,16 +105,16 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) { #include "SkNx.h" SkPM4f SkPM4f::FromPMColor(SkPMColor c) { - Sk4f value = SkNx_cast(Sk4b::Load((const uint8_t*)&c)); + Sk4f value = SkNx_cast(Sk4b::Load(&c)); SkPM4f c4; - (value * Sk4f(1.0f / 255)).store(c4.fVec); + (value * Sk4f(1.0f / 255)).store(&c4); return c4; } SkColor4f SkColor4f::FromColor(SkColor c) { - Sk4f value = SkNx_shuffle<3,2,1,0>(SkNx_cast(Sk4b::Load((const uint8_t*)&c))); + Sk4f value = SkNx_shuffle<3,2,1,0>(SkNx_cast(Sk4b::Load(&c))); SkColor4f c4; - (value * Sk4f(1.0f / 255)).store(c4.vec()); + (value * Sk4f(1.0f / 255)).store(&c4); return c4; } @@ -138,7 +138,7 @@ SkPM4f SkColor4f::premul() const { #endif SkPM4f pm4; - dst.store(pm4.fVec); + dst.store(&pm4); return pm4; } diff --git a/src/core/SkGeometry.cpp b/src/core/SkGeometry.cpp index 629703a..809ed19 100644 --- a/src/core/SkGeometry.cpp +++ b/src/core/SkGeometry.cpp @@ -11,7 +11,7 @@ static SkVector to_vector(const Sk2s& x) { SkVector vector; - x.store(&vector.fX); + x.store(&vector); return vector; } @@ -1060,7 +1060,7 @@ SkVector SkConic::evalTangentAt(SkScalar t) const { void SkConic::evalAt(SkScalar t, SkPoint* pt, SkVector* tangent) const { SkASSERT(t >= 0 && t <= SK_Scalar1); - + if (pt) { *pt = this->evalAt(t); } diff --git a/src/core/SkGeometry.h b/src/core/SkGeometry.h index 935967e..15f1e55 100644 --- a/src/core/SkGeometry.h +++ b/src/core/SkGeometry.h @@ -12,12 +12,12 @@ #include "SkNx.h" static inline Sk2s from_point(const SkPoint& point) { - return Sk2s::Load(&point.fX); + return Sk2s::Load(&point); } static inline SkPoint to_point(const Sk2s& x) { SkPoint point; - x.store(&point.fX); + x.store(&point); return point; } diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp index e105493..08602b7 100644 --- a/src/core/SkMipMap.cpp +++ b/src/core/SkMipMap.cpp @@ -21,11 +21,11 @@ struct ColorTypeFilter_8888 { typedef uint32_t Type; #if defined(SKNX_IS_FAST) static Sk4h Expand(uint32_t x) { - return SkNx_cast(Sk4b::Load((const uint8_t*)&x)); + return SkNx_cast(Sk4b::Load(&x)); } static uint32_t Compact(const Sk4h& x) { uint32_t r; - SkNx_cast(x).store((uint8_t*)&r); + SkNx_cast(x).store(&r); return r; } #else diff --git a/src/core/SkNx.h b/src/core/SkNx.h index 2bbd495..4cb15df 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -32,7 +32,8 @@ public: SkNx() {} SkNx(const SkNx& lo, const SkNx& hi) : fLo(lo), fHi(hi) {} SkNx(T val) : fLo(val), fHi(val) {} - static SkNx Load(const T vals[N]) { + static SkNx Load(const void* ptr) { + auto vals = (const T*)ptr; return SkNx(SkNx::Load(vals), SkNx::Load(vals+N/2)); } @@ -43,7 +44,8 @@ public: T i, T j, T k, T l, T m, T n, T o, T p) : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); } - void store(T vals[N]) const { + void store(void* ptr) const { + auto vals = (T*)ptr; fLo.store(vals); fHi.store(vals+N/2); } @@ -108,9 +110,15 @@ class SkNx<1,T> { public: SkNx() {} SkNx(T val) : fVal(val) {} - static SkNx Load(const T vals[1]) { return SkNx(vals[0]); } + static SkNx Load(const void* ptr) { + auto vals = (const T*)ptr; + return SkNx(vals[0]); + } - void store(T vals[1]) const { vals[0] = fVal; } + void store(void* ptr) const { + auto vals = (T*) ptr; + vals[0] = fVal; + } SkNx saturatedAdd(const SkNx& o) const { SkASSERT((T)(~0) > 0); // TODO: support signed T diff --git a/src/core/SkRect.cpp b/src/core/SkRect.cpp index 6c44ea2..f2060f8 100644 --- a/src/core/SkRect.cpp +++ b/src/core/SkRect.cpp @@ -67,7 +67,7 @@ bool SkRect::setBoundsCheck(const SkPoint pts[], int count) { pts += 1; count -= 1; } else { - min = Sk4s::Load(&pts[0].fX); + min = Sk4s::Load(pts); pts += 2; count -= 2; } @@ -76,7 +76,7 @@ bool SkRect::setBoundsCheck(const SkPoint pts[], int count) { count >>= 1; for (int i = 0; i < count; ++i) { - Sk4s xy = Sk4s::Load(&pts->fX); + Sk4s xy = Sk4s::Load(pts); accum = accum * xy; min = Sk4s::Min(min, xy); max = Sk4s::Max(max, xy); diff --git a/src/core/SkScan_Hairline.cpp b/src/core/SkScan_Hairline.cpp index efccfc9..a63220e 100644 --- a/src/core/SkScan_Hairline.cpp +++ b/src/core/SkScan_Hairline.cpp @@ -233,7 +233,7 @@ static void hairquad(const SkPoint pts[3], const SkRegion* clip, Sk2s C = coeff.fC; for (int i = 1; i < lines; ++i) { t = t + dt; - ((A * t + B) * t + C).store(&tmp[i].fX); + ((A * t + B) * t + C).store(&tmp[i]); } tmp[lines] = pts[2]; lineproc(tmp, lines + 1, clip, blitter); @@ -310,7 +310,7 @@ static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* bl Sk2s D = coeff.fD; for (int i = 1; i < lines; ++i) { t = t + dt; - (((A * t + B) * t + C) * t + D).store(&tmp[i].fX); + (((A * t + B) * t + C) * t + D).store(&tmp[i]); } tmp[lines] = pts[3]; lineproc(tmp, lines + 1, clip, blitter); @@ -319,10 +319,10 @@ static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* bl static SkRect compute_nocheck_cubic_bounds(const SkPoint pts[4]) { SkASSERT(SkScalarsAreFinite(&pts[0].fX, 8)); - Sk2s min = Sk2s::Load(&pts[0].fX); + Sk2s min = Sk2s::Load(pts); Sk2s max = min; for (int i = 1; i < 4; ++i) { - Sk2s pair = Sk2s::Load(&pts[i].fX); + Sk2s pair = Sk2s::Load(pts+i); min = Sk2s::Min(min, pair); max = Sk2s::Max(max, pair); } diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp index 4bc07b3..a0878a5 100644 --- a/src/effects/SkColorMatrixFilter.cpp +++ b/src/effects/SkColorMatrixFilter.cpp @@ -86,7 +86,7 @@ static Sk4f clamp_0_1(const Sk4f& x) { static SkPMColor round(const Sk4f& x) { SkPMColor c; - SkNx_cast(x * Sk4f(255) + Sk4f(0.5f)).store((uint8_t*)&c); + SkNx_cast(x * Sk4f(255) + Sk4f(0.5f)).store(&c); return c; } @@ -132,7 +132,7 @@ struct SkPMColorAdaptor { return round(c4); } static Sk4f To4f(SkPMColor c) { - return SkNx_cast(Sk4b::Load((const uint8_t*)&c)) * Sk4f(1.0f/255); + return SkNx_cast(Sk4b::Load(&c)) * Sk4f(1.0f/255); } }; void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const { @@ -142,11 +142,11 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor struct SkPM4fAdaptor { static SkPM4f From4f(const Sk4f& c4) { SkPM4f c; - c4.store(c.fVec); + c4.store(&c); return c; } static Sk4f To4f(const SkPM4f& c) { - return Sk4f::Load(c.fVec); + return Sk4f::Load(&c); } }; void SkColorMatrixFilter::filterSpan4f(const SkPM4f src[], int count, SkPM4f dst[]) const { diff --git a/src/effects/gradients/SkLinearGradient.cpp b/src/effects/gradients/SkLinearGradient.cpp index 8c74427..4a27a35 100644 --- a/src/effects/gradients/SkLinearGradient.cpp +++ b/src/effects/gradients/SkLinearGradient.cpp @@ -133,7 +133,7 @@ SkLinearGradient::LinearGradientContext::LinearGradientContext( const Sk4f scale(1, 1, 1, paintAlpha); for (int i = 0; i < count; ++i) { uint32_t c = SkSwizzle_Color_to_PMColor(shader.fOrigColors[i]); - rec[i].fColor = SkNx_cast(Sk4b::Load((const uint8_t*)&c)) * scale; + rec[i].fColor = SkNx_cast(Sk4b::Load(&c)) * scale; if (i > 0) { SkASSERT(rec[i - 1].fPos <= rec[i].fPos); } @@ -145,7 +145,7 @@ SkLinearGradient::LinearGradientContext::LinearGradientContext( for (int i = 0; i < count; ++i) { SkPMColor pmc = SkPreMultiplyColor(shader.fOrigColors[i]); pmc = SkAlphaMulQ(pmc, alphaScale); - rec[i].fColor = SkNx_cast(Sk4b::Load((const uint8_t*)&pmc)); + rec[i].fColor = SkNx_cast(Sk4b::Load(&pmc)); if (i > 0) { SkASSERT(rec[i - 1].fPos <= rec[i].fPos); } @@ -515,7 +515,7 @@ find_backward(const SkLinearGradient::LinearGradientContext::Rec rec[], float ti template SkPMColor trunc_from_255(const Sk4f& x) { SkPMColor c; - SkNx_cast(x).store((uint8_t*)&c); + SkNx_cast(x).store(&c); if (apply_alpha) { c = SkPreMultiplyARGB(SkGetPackedA32(c), SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c)); diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h index ab8d1d4..e0c4c4a 100644 --- a/src/opts/SkColorCubeFilter_opts.h +++ b/src/opts/SkColorCubeFilter_opts.h @@ -59,10 +59,10 @@ void color_cube_filter_span(const SkPMColor src[], const SkColor lutColor10 = colorCube[ix + i10]; const SkColor lutColor11 = colorCube[ix + i11]; - Sk4f sum = SkNx_cast(Sk4b::Load((const uint8_t*)&lutColor00)) * g0b0; - sum = sum + SkNx_cast(Sk4b::Load((const uint8_t*)&lutColor01)) * g0b1; - sum = sum + SkNx_cast(Sk4b::Load((const uint8_t*)&lutColor10)) * g1b0; - sum = sum + SkNx_cast(Sk4b::Load((const uint8_t*)&lutColor11)) * g1b1; + Sk4f sum = SkNx_cast(Sk4b::Load(&lutColor00)) * g0b0; + sum = sum + SkNx_cast(Sk4b::Load(&lutColor01)) * g0b1; + sum = sum + SkNx_cast(Sk4b::Load(&lutColor10)) * g1b0; + sum = sum + SkNx_cast(Sk4b::Load(&lutColor11)) * g1b1; color = color + sum * Sk4f((float)colorToFactors[x][r]); } if (a != 255) { diff --git a/src/opts/SkMatrix_opts.h b/src/opts/SkMatrix_opts.h index 3fb2701..b3d3f61 100644 --- a/src/opts/SkMatrix_opts.h +++ b/src/opts/SkMatrix_opts.h @@ -27,14 +27,14 @@ static void matrix_translate(const SkMatrix& m, SkPoint* dst, const SkPoint* src Sk4s trans4(tx, ty, tx, ty); count >>= 1; if (count & 1) { - (Sk4s::Load(&src->fX) + trans4).store(&dst->fX); + (Sk4s::Load(src) + trans4).store(dst); src += 2; dst += 2; } count >>= 1; for (int i = 0; i < count; ++i) { - (Sk4s::Load(&src[0].fX) + trans4).store(&dst[0].fX); - (Sk4s::Load(&src[2].fX) + trans4).store(&dst[2].fX); + (Sk4s::Load(src+0) + trans4).store(dst+0); + (Sk4s::Load(src+2) + trans4).store(dst+2); src += 4; dst += 4; } @@ -58,14 +58,14 @@ static void matrix_scale_translate(const SkMatrix& m, SkPoint* dst, const SkPoin Sk4s scale4(sx, sy, sx, sy); count >>= 1; if (count & 1) { - (Sk4s::Load(&src->fX) * scale4 + trans4).store(&dst->fX); + (Sk4s::Load(src) * scale4 + trans4).store(dst); src += 2; dst += 2; } count >>= 1; for (int i = 0; i < count; ++i) { - (Sk4s::Load(&src[0].fX) * scale4 + trans4).store(&dst[0].fX); - (Sk4s::Load(&src[2].fX) * scale4 + trans4).store(&dst[2].fX); + (Sk4s::Load(src+0) * scale4 + trans4).store(dst+0); + (Sk4s::Load(src+2) * scale4 + trans4).store(dst+2); src += 4; dst += 4; } @@ -92,9 +92,9 @@ static void matrix_affine(const SkMatrix& m, SkPoint* dst, const SkPoint* src, i Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4 count >>= 1; for (int i = 0; i < count; ++i) { - Sk4s src4 = Sk4s::Load(&src->fX); - Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC - (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX); + Sk4s src4 = Sk4s::Load(src); + Sk4s swz4 = SkNx_shuffle<1,0,3,2>(src4); // y0 x0, y1 x1 + (src4 * scale4 + swz4 * skew4 + trans4).store(dst); src += 2; dst += 2; } diff --git a/src/opts/SkNx_avx.h b/src/opts/SkNx_avx.h index f635181..85a2110 100644 --- a/src/opts/SkNx_avx.h +++ b/src/opts/SkNx_avx.h @@ -24,12 +24,12 @@ public: SkNx() {} SkNx(float val) : fVec(_mm256_set1_ps(val)) {} - static SkNx Load(const float vals[8]) { return _mm256_loadu_ps(vals); } + static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); } SkNx(float a, float b, float c, float d, float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c,d,e,f,g,h)) {} - void store(float vals[8]) const { _mm256_storeu_ps(vals, fVec); } + void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); } diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 8adb276..a4b7cd1 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -41,10 +41,10 @@ public: SkNx() {} SkNx(float val) : fVec(vdup_n_f32(val)) {} - static SkNx Load(const float vals[2]) { return vld1_f32(vals); } + static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); } SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; } - void store(float vals[2]) const { vst1_f32(vals, fVec); } + void store(void* ptr) const { vst1_f32((float*)ptr, fVec); } SkNx approxInvert() const { float32x2_t est0 = vrecpe_f32(fVec), @@ -122,10 +122,10 @@ public: SkNx() {} SkNx(int val) : fVec(vdupq_n_s32(val)) {} - static SkNx Load(const int vals[4]) { return vld1q_s32(vals); } + static SkNx Load(const void* ptr) { return vld1q_s32((const int*)ptr); } SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } - void store(int vals[4]) const { vst1q_s32(vals, fVec); } + void store(void* ptr) const { vst1q_s32((int*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } @@ -149,10 +149,10 @@ public: SkNx() {} SkNx(float val) : fVec(vdupq_n_f32(val)) {} - static SkNx Load(const float vals[4]) { return vld1q_f32(vals); } + static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); } SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } - void store(float vals[4]) const { vst1q_f32(vals, fVec); } + void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); } SkNx approxInvert() const { float32x4_t est0 = vrecpeq_f32(fVec), est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); @@ -240,13 +240,13 @@ public: SkNx() {} SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {} - static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); } + static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); } SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { fVec = (uint16x4_t) { a,b,c,d }; } - void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); } + void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } @@ -276,14 +276,14 @@ public: SkNx() {} SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} - static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } + static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); } SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, uint16_t e, uint16_t f, uint16_t g, uint16_t h) { fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; } - void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } + void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); } @@ -312,11 +312,11 @@ public: SkNx(const uint8x8_t& vec) : fVec(vec) {} SkNx() {} - static SkNx Load(const uint8_t vals[4]) { - return (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); + static SkNx Load(const void* ptr) { + return (uint8x8_t)vld1_dup_u32((const uint32_t*)ptr); } - void store(uint8_t vals[4]) const { - return vst1_lane_u32((uint32_t*)vals, (uint32x2_t)fVec, 0); + void store(void* ptr) const { + return vst1_lane_u32((uint32_t*)ptr, (uint32x2_t)fVec, 0); } // TODO as needed @@ -331,7 +331,7 @@ public: SkNx() {} SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {} - static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); } + static SkNx Load(const void* ptr) { return vld1q_u8((const uint8_t*)ptr); } SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h, @@ -340,7 +340,7 @@ public: fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; } - void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } + void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); } SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); } diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index a17d988..71ecbfd 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -22,12 +22,12 @@ public: SkNx() {} SkNx(float val) : fVec(_mm_set1_ps(val)) {} - static SkNx Load(const float vals[2]) { - return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); + static SkNx Load(const void* ptr) { + return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr)); } SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} - void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } + void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } @@ -71,10 +71,10 @@ public: SkNx() {} SkNx(double val) : fVec(_mm_set1_pd(val)) {} - static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); } + static SkNx Load(const void* ptr) { return _mm_loadu_pd((const double*)ptr); } SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} - void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } + void store(void* ptr) const { _mm_storeu_pd((double*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } @@ -117,10 +117,10 @@ public: SkNx() {} SkNx(int val) : fVec(_mm_set1_epi32(val)) {} - static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*)vals); } + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); } SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} - void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); } @@ -155,11 +155,11 @@ public: SkNx() {} SkNx(float val) : fVec( _mm_set1_ps(val) ) {} - static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); } + static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); } SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} - void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } + void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } @@ -210,10 +210,10 @@ public: SkNx() {} SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} - static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m128i*)vals); } + static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); } SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a,b,c,d,0,0,0,0)) {} - void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); } + void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); } @@ -237,11 +237,11 @@ public: SkNx() {} SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} - static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m128i*)vals); } + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); } SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a,b,c,d,e,f,g,h)) {} - void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); } + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); } SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); } @@ -278,8 +278,8 @@ public: SkNx(const __m128i& vec) : fVec(vec) {} SkNx() {} - static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const int*)vals); } - void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); } + static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)ptr); } + void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); } // TODO as needed @@ -292,8 +292,8 @@ public: SkNx(const __m128i& vec) : fVec(vec) {} SkNx() {} - static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m128i*)vals); } - void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec); } + static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); } + void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } // TODO as needed @@ -307,14 +307,14 @@ public: SkNx() {} SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} - static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m128i*)vals); } + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); } SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h, uint8_t i, uint8_t j, uint8_t k, uint8_t l, uint8_t m, uint8_t n, uint8_t o, uint8_t p) : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} - void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); } + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); } diff --git a/src/opts/SkXfermode_opts.h b/src/opts/SkXfermode_opts.h index 7b72bc3..f15094c 100644 --- a/src/opts/SkXfermode_opts.h +++ b/src/opts/SkXfermode_opts.h @@ -285,12 +285,12 @@ private: } static Sk4f Load(SkPMColor c) { - return SkNx_cast(Sk4b::Load((uint8_t*)&c)) * Sk4f(1.0f/255); + return SkNx_cast(Sk4b::Load(&c)) * Sk4f(1.0f/255); } static SkPMColor Round(const Sk4f& f) { SkPMColor c; - SkNx_cast(f * Sk4f(255) + Sk4f(0.5f)).store((uint8_t*)&c); + SkNx_cast(f * Sk4f(255) + Sk4f(0.5f)).store(&c); return c; } -- 2.7.4