When not optimizing, write _mm_shuffle_pi16() as a statement
expression with inline assembly. That way we avoid
__builtin_ia32_pshufw(), which is only available when compiling with
-msse, while still allowing the non-optimizing gcc to understand that
the second argument is a compile time constant.
Tested-by: Knut Petersen <knut_petersen@t-online.de>
return ret;
}
# else
-# define _mm_shuffle_pi16(A, N) \
- ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+# define _mm_shuffle_pi16(A, N) \
+ ({ \
+ __m64 ret; \
+ \
+ asm ("pshufw %2, %1, %0\n\t" \
+ : "=y" (ret) \
+ : "y" (A), "K" ((const int8_t)N) \
+ ); \
+ \
+ ret; \
+ })
# endif
# endif
#endif