}
Sk4s trans4(tx, ty, tx, ty);
Sk4s scale4(sx, sy, sx, sy);
- Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4
+ Sk4s skew4(ky, kx, ky, kx); // applied src4, then x/y swapped
count >>= 1;
for (int i = 0; i < count; ++i) {
Sk4s src4 = Sk4s::Load(&src->fX);
- Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC
- (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX);
+ (trans4 + src4 * scale4 + SkNx_shuffle<1,0,3,2>(src4 * skew4)).store(&dst->fX);
src += 2;
dst += 2;
}
#undef SHIFT16
#undef SHIFT8
+template <>
+inline SkNf<4,float> SkNx_shuffle_impl<SkNf<4,float>, 1,0,3,2>(const SkNf<4,float>& src) {
+ return vrev64q_f32(src.fVec);
+}
+
} // namespace
#endif//SkNx_neon_DEFINED