iwMMXt which has an unsigned saturation pack instruction, while MMX/EXT
and Loongson don't.
ARM/iwMMXt:
src_8888_0565 = L1: 110.38 L2: 82.33 M: 40.92 ( 73.22%) HT: 35.63 VT: 32.22 R: 30.07 RT: 18.40 ( 132Kops/s)
src_8888_0565 = L1: 117.91 L2: 83.05 M: 41.52 ( 75.58%) HT: 37.63 VT: 35.40 R: 29.37 RT: 19.39 ( 134Kops/s)
t1 = _mm_or_si64 (t1, g1);
t0 = shift(t0, -5);
+#ifdef USE_ARM_IWMMXT
+ t1 = shift(t1, -5);
+ return _mm_packs_pu32 (t0, t1);
+#else
t1 = shift(t1, -5 + 16);
-
return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
+#endif
}
#ifndef _MSC_VER