From: Matt Turner Date: Mon, 21 May 2012 09:56:58 +0000 (-0400) Subject: loongson: optimize _mm_set_pi* functions with shuffle instructions X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ef99f9e97260cc55678385a6d691c195f57bd6b1;p=platform%2Fupstream%2Fpixman.git loongson: optimize _mm_set_pi* functions with shuffle instructions --- diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h index f0931ac..086c6e0 100644 --- a/pixman/loongson-mmintrin.h +++ b/pixman/loongson-mmintrin.h @@ -182,9 +182,34 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2) return ret; } +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) { + if (__builtin_constant_p (__w3) && + __builtin_constant_p (__w2) && + __builtin_constant_p (__w1) && + __builtin_constant_p (__w0)) + { + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; + } + else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0) + { + /* TODO: handle other cases */ + uint64_t val = __w3; + uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm) + ); + return ret; + } uint64_t val = ((uint64_t)__w3 << 48) | ((uint64_t)__w2 << 32) | ((uint64_t)__w1 << 16) @@ -195,10 +220,28 @@ _mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi32 (unsigned __i1, unsigned __i0) { + if (__builtin_constant_p (__i1) && + __builtin_constant_p (__i0)) + { + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; + } + else if (__i1 == __i0) + { + uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) + ); + return ret; + } uint64_t val = ((uint64_t)__i1 << 32) | ((uint64_t)__i0 << 0); return *(__m64 *)&val; } +#undef _MM_SHUFFLE extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi16 (__m64 __m, int64_t __n)