From 7f6596bb13e85cd11f16c997c9aa62ad5a09376f Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 9 Jun 2014 03:31:47 +0000 Subject: [PATCH] [PPC64LE] Implement little-endian semantics for vec_sums The PowerPC vsumsws instruction, accessed via vec_sums, is defined architecturally with a big-endian bias, in that the second input vector and the result always reference big-endian element 3 (little-endian element 0). For ease of porting, the programmer wants elements 3 in both cases. To provide this semantics, for little endian we generate a permute for the second input vector prior to the vsumsws instruction, and generate a permute for the result vector following the vsumsws instruction. The correctness of this code is tested by the new sums.c test added in a previous patch, as well as the modifications to builtins-ppc-altivec.c in the present patch. llvm-svn: 210449 --- clang/lib/Headers/altivec.h | 26 ++++++++++++++++++++++++++ clang/test/CodeGen/builtins-ppc-altivec.c | 4 ++++ 2 files changed, 30 insertions(+) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 65990e0..bda5a0e 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -8398,10 +8398,26 @@ vec_vsum2sws(vector int __a, vector int __b) /* vec_sums */ +/* The vsumsws instruction has a big-endian bias, so that the second + input vector and the result always reference big-endian element 3 + (little-endian element 0). For ease of porting the programmer + wants element 3 in both cases, so for little endian we must perform + some permutes. */ + static vector signed int __attribute__((__always_inline__)) vec_sums(vector signed int __a, vector signed int __b) { +#ifdef __LITTLE_ENDIAN__ + __b = (vector signed int) + vec_perm(__b, __b, (vector unsigned char) + (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11)); + __b = __builtin_altivec_vsumsws(__a, __b); + return (vector signed int) + vec_perm(__b, __b, (vector unsigned char) + (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); +#else return __builtin_altivec_vsumsws(__a, __b); +#endif } /* vec_vsumsws */ @@ -8409,7 +8425,17 @@ vec_sums(vector signed int __a, vector signed int __b) static vector signed int __attribute__((__always_inline__)) vec_vsumsws(vector signed int __a, vector signed int __b) { +#ifdef __LITTLE_ENDIAN__ + __b = (vector signed int) + vec_perm(__b, __b, (vector unsigned char) + (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11)); + __b = __builtin_altivec_vsumsws(__a, __b); + return (vector signed int) + vec_perm(__b, __b, (vector unsigned char) + (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); +#else return __builtin_altivec_vsumsws(__a, __b); +#endif } /* vec_trunc */ diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c index 8277f5d..c94656e 100644 --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -5155,11 +5155,15 @@ void test6() { /* vec_sums */ res_vi = vec_sums(vi, vi); // CHECK: @llvm.ppc.altivec.vsumsws +// CHECK-LE: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vsumsws +// CHECK-LE: @llvm.ppc.altivec.vperm res_vi = vec_vsumsws(vi, vi); // CHECK: @llvm.ppc.altivec.vsumsws +// CHECK-LE: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vsumsws +// CHECK-LE: @llvm.ppc.altivec.vperm /* vec_trunc */ res_vf = vec_trunc(vf); -- 2.7.4