From 4e8a09d3672702704436112f3aa5611bd79b2690 Mon Sep 17 00:00:00 2001 From: mtklein Date: Thu, 10 Sep 2015 11:18:31 -0700 Subject: [PATCH] Port SkMatrix opts to SkOpts. No changes to the code, just moved around. This will have the effect of enabling vectorized code on ARMv7. Should be no effect on ARMv8 or x86, which would have been vectorized already. nanobench --match mappoints changes on Nexus 5 (ARMv7): _affine: 132 -> 95 _scale: 118 -> 47 _trans: 60 -> 37 A teaser: We should next look at the ABCD->BADC shuffle we've noted that we need in _affine. A quick hack showed doing that optimally is another ~35% speedup on x86. Got to figure out how to do it best on ARM though: that same quick hack was a 2x slowdown there. Good reason to resurrect that SkNx_shuffle() CL! (I believe the answers are vrev64q_f32(v) and _mm_shuffle_ps(v,v, _MM_SHUFFLE(2,3,0,1), but we should probably find out in another CL.) BUG=skia:4117 Review URL: https://codereview.chromium.org/1320673014 --- src/core/SkMatrix.cpp | 92 ++++------------------------------------ src/core/SkOpts.cpp | 5 +++ src/core/SkOpts.h | 3 ++ src/opts/SkMatrix_opts.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++ src/opts/SkOpts_neon.cpp | 5 +++ 5 files changed, 126 insertions(+), 85 deletions(-) create mode 100644 src/opts/SkMatrix_opts.h diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp index 13a9ac2..b32f372 100644 --- a/src/core/SkMatrix.cpp +++ b/src/core/SkMatrix.cpp @@ -10,6 +10,7 @@ #include "SkRSXform.h" #include "SkString.h" #include "SkNx.h" +#include "SkOpts.h" #include @@ -445,14 +446,14 @@ SkMatrix& SkMatrix::setRSXform(const SkRSXform& xform) { fMat[kMScaleX] = xform.fSCos; fMat[kMSkewX] = -xform.fSSin; fMat[kMTransX] = xform.fTx; - + fMat[kMSkewY] = xform.fSSin; fMat[kMScaleY] = xform.fSCos; fMat[kMTransY] = xform.fTy; - + fMat[kMPersp0] = fMat[kMPersp1] = 0; fMat[kMPersp2] = 1; - + this->setTypeMask(kUnknown_Mask | kOnlyPerspectiveValid_Mask); return *this; } @@ -907,64 +908,11 @@ void SkMatrix::Identity_pts(const SkMatrix& m, SkPoint dst[], const SkPoint src[ } void SkMatrix::Trans_pts(const SkMatrix& m, SkPoint dst[], const SkPoint src[], int count) { - SkASSERT(m.getType() <= kTranslate_Mask); - - if (count > 0) { - SkScalar tx = m.getTranslateX(); - SkScalar ty = m.getTranslateY(); - if (count & 1) { - dst->fX = src->fX + tx; - dst->fY = src->fY + ty; - src += 1; - dst += 1; - } - Sk4s trans4(tx, ty, tx, ty); - count >>= 1; - if (count & 1) { - (Sk4s::Load(&src->fX) + trans4).store(&dst->fX); - src += 2; - dst += 2; - } - count >>= 1; - for (int i = 0; i < count; ++i) { - (Sk4s::Load(&src[0].fX) + trans4).store(&dst[0].fX); - (Sk4s::Load(&src[2].fX) + trans4).store(&dst[2].fX); - src += 4; - dst += 4; - } - } + return SkOpts::matrix_translate(m,dst,src,count); } void SkMatrix::Scale_pts(const SkMatrix& m, SkPoint dst[], const SkPoint src[], int count) { - SkASSERT(m.getType() <= (kScale_Mask | kTranslate_Mask)); - - if (count > 0) { - SkScalar tx = m.getTranslateX(); - SkScalar ty = m.getTranslateY(); - SkScalar sx = m.getScaleX(); - SkScalar sy = m.getScaleY(); - if (count & 1) { - dst->fX = src->fX * sx + tx; - dst->fY = src->fY * sy + ty; - src += 1; - dst += 1; - } - Sk4s trans4(tx, ty, tx, ty); - Sk4s scale4(sx, sy, sx, sy); - count >>= 1; - if (count & 1) { - (Sk4s::Load(&src->fX) * scale4 + trans4).store(&dst->fX); - src += 2; - dst += 2; - } - count >>= 1; - for (int i = 0; i < count; ++i) { - (Sk4s::Load(&src[0].fX) * scale4 + trans4).store(&dst[0].fX); - (Sk4s::Load(&src[2].fX) * scale4 + trans4).store(&dst[2].fX); - src += 4; - dst += 4; - } - } + return SkOpts::matrix_scale_translate(m,dst,src,count); } void SkMatrix::Persp_pts(const SkMatrix& m, SkPoint dst[], @@ -996,33 +944,7 @@ void SkMatrix::Persp_pts(const SkMatrix& m, SkPoint dst[], } void SkMatrix::Affine_vpts(const SkMatrix& m, SkPoint dst[], const SkPoint src[], int count) { - SkASSERT(m.getType() != kPerspective_Mask); - - if (count > 0) { - SkScalar tx = m.getTranslateX(); - SkScalar ty = m.getTranslateY(); - SkScalar sx = m.getScaleX(); - SkScalar sy = m.getScaleY(); - SkScalar kx = m.getSkewX(); - SkScalar ky = m.getSkewY(); - if (count & 1) { - dst->set(src->fX * sx + src->fY * kx + tx, - src->fX * ky + src->fY * sy + ty); - src += 1; - dst += 1; - } - Sk4s trans4(tx, ty, tx, ty); - Sk4s scale4(sx, sy, sx, sy); - Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4 - count >>= 1; - for (int i = 0; i < count; ++i) { - Sk4s src4 = Sk4s::Load(&src->fX); - Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC - (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX); - src += 2; - dst += 2; - } - } + return SkOpts::matrix_affine(m,dst,src,count); } const SkMatrix::MapPtsProc SkMatrix::gMapPtsProcs[] = { diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 492fae3..a540bc8 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -14,6 +14,7 @@ #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" #include "SkFloatingPoint_opts.h" +#include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkTextureCompressor_opts.h" #include "SkUtils_opts.h" @@ -58,6 +59,10 @@ namespace SkOpts { decltype(blit_row_color32) blit_row_color32 = sk_default::blit_row_color32; + decltype(matrix_translate) matrix_translate = sk_default::matrix_translate; + decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate; + decltype(matrix_affine) matrix_affine = sk_default::matrix_affine; + // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); void Init_sse41(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index d73319a..6fa1e46 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -8,6 +8,7 @@ #ifndef SkOpts_DEFINED #define SkOpts_DEFINED +#include "SkMatrix.h" #include "SkTextureCompressor.h" #include "SkTypes.h" #include "SkXfermode.h" @@ -54,6 +55,8 @@ namespace SkOpts { const SkScalar * [2], int, const SkColor*); + + extern SkMatrix::MapPtsProc matrix_translate, matrix_scale_translate, matrix_affine; } #endif//SkOpts_DEFINED diff --git a/src/opts/SkMatrix_opts.h b/src/opts/SkMatrix_opts.h new file mode 100644 index 0000000..3fb2701 --- /dev/null +++ b/src/opts/SkMatrix_opts.h @@ -0,0 +1,106 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkMatrix_opts_DEFINED +#define SkMatrix_opts_DEFINED + +#include "SkMatrix.h" +#include "SkNx.h" + +namespace SK_OPTS_NS { + +static void matrix_translate(const SkMatrix& m, SkPoint* dst, const SkPoint* src, int count) { + SkASSERT(m.getType() <= SkMatrix::kTranslate_Mask); + if (count > 0) { + SkScalar tx = m.getTranslateX(); + SkScalar ty = m.getTranslateY(); + if (count & 1) { + dst->fX = src->fX + tx; + dst->fY = src->fY + ty; + src += 1; + dst += 1; + } + Sk4s trans4(tx, ty, tx, ty); + count >>= 1; + if (count & 1) { + (Sk4s::Load(&src->fX) + trans4).store(&dst->fX); + src += 2; + dst += 2; + } + count >>= 1; + for (int i = 0; i < count; ++i) { + (Sk4s::Load(&src[0].fX) + trans4).store(&dst[0].fX); + (Sk4s::Load(&src[2].fX) + trans4).store(&dst[2].fX); + src += 4; + dst += 4; + } + } +} + +static void matrix_scale_translate(const SkMatrix& m, SkPoint* dst, const SkPoint* src, int count) { + SkASSERT(m.getType() <= (SkMatrix::kScale_Mask | SkMatrix::kTranslate_Mask)); + if (count > 0) { + SkScalar tx = m.getTranslateX(); + SkScalar ty = m.getTranslateY(); + SkScalar sx = m.getScaleX(); + SkScalar sy = m.getScaleY(); + if (count & 1) { + dst->fX = src->fX * sx + tx; + dst->fY = src->fY * sy + ty; + src += 1; + dst += 1; + } + Sk4s trans4(tx, ty, tx, ty); + Sk4s scale4(sx, sy, sx, sy); + count >>= 1; + if (count & 1) { + (Sk4s::Load(&src->fX) * scale4 + trans4).store(&dst->fX); + src += 2; + dst += 2; + } + count >>= 1; + for (int i = 0; i < count; ++i) { + (Sk4s::Load(&src[0].fX) * scale4 + trans4).store(&dst[0].fX); + (Sk4s::Load(&src[2].fX) * scale4 + trans4).store(&dst[2].fX); + src += 4; + dst += 4; + } + } +} + +static void matrix_affine(const SkMatrix& m, SkPoint* dst, const SkPoint* src, int count) { + SkASSERT(m.getType() != SkMatrix::kPerspective_Mask); + if (count > 0) { + SkScalar tx = m.getTranslateX(); + SkScalar ty = m.getTranslateY(); + SkScalar sx = m.getScaleX(); + SkScalar sy = m.getScaleY(); + SkScalar kx = m.getSkewX(); + SkScalar ky = m.getSkewY(); + if (count & 1) { + dst->set(src->fX * sx + src->fY * kx + tx, + src->fX * ky + src->fY * sy + ty); + src += 1; + dst += 1; + } + Sk4s trans4(tx, ty, tx, ty); + Sk4s scale4(sx, sy, sx, sy); + Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4 + count >>= 1; + for (int i = 0; i < count; ++i) { + Sk4s src4 = Sk4s::Load(&src->fX); + Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC + (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX); + src += 2; + dst += 2; + } + } +} + +} // namespace SK_OPTS_NS + +#endif//SkMatrix_opts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index 802d83f..a0388b0 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -13,6 +13,7 @@ #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" #include "SkFloatingPoint_opts.h" +#include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkTextureCompressor_opts.h" #include "SkUtils_opts.h" @@ -42,5 +43,9 @@ namespace SkOpts { blit_row_color32 = sk_neon::blit_row_color32; color_cube_filter_span = sk_neon::color_cube_filter_span; + + matrix_translate = sk_neon::matrix_translate; + matrix_scale_translate = sk_neon::matrix_scale_translate; + matrix_affine = sk_neon::matrix_affine; } } -- 2.7.4