From d7eb917a7cb793f49e16841fc24826b988dd5c8f Mon Sep 17 00:00:00 2001 From: Albion Fung Date: Wed, 23 Sep 2020 01:17:59 -0500 Subject: [PATCH] [PowerPC] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins This patch implements 128-bit Binary Vector Mod and Sign Extend builtins for PowerPC10. Differential: https://reviews.llvm.org/D87394#inline-815858 --- clang/include/clang/Basic/BuiltinsPPC.def | 10 ++++ clang/lib/Headers/altivec.h | 46 +++++++++++++++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 18 +++++++ clang/test/CodeGen/builtins-ppc-p9vector.c | 29 +++++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 14 ++++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 + llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 15 ++++-- llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 9 ++-- llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll | 22 +++++++++ .../test/CodeGen/PowerPC/p10-vector-sign-extend.ll | 17 +++++++ llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll | 57 ++++++++++++++++++++++ 11 files changed, 231 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll create mode 100644 llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index b571454..5de3584 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -303,6 +303,16 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P9 Vector extend sign builtins. +BUILTIN(__builtin_altivec_vextsb2w, "V4SiV16Sc", "") +BUILTIN(__builtin_altivec_vextsb2d, "V2SLLiV16Sc", "") +BUILTIN(__builtin_altivec_vextsh2w, "V4SiV8Ss", "") +BUILTIN(__builtin_altivec_vextsh2d, "V2SLLiV8Ss", "") +BUILTIN(__builtin_altivec_vextsw2d, "V2SLLiV4Si", "") + +// P10 Vector extend sign builtins. +BUILTIN(__builtin_altivec_vextsd2q, "V1SLLLiV2SLLi", "") + // P10 Vector Extract with Mask built-ins. BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "") BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 2c09e47..b07e45d 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -3007,6 +3007,42 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #define vec_vctuxs __builtin_altivec_vctuxs +/* vec_signext */ + +#ifdef __POWER9_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed char __a) { + return __builtin_altivec_vextsb2w(__a); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed short __a) { + return __builtin_altivec_vextsh2w(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed char __a) { + return __builtin_altivec_vextsb2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed short __a) { + return __builtin_altivec_vextsh2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed int __a) { + return __builtin_altivec_vextsw2d(__a); +} +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_signextq(vector signed long long __a) { + return __builtin_altivec_vextsd2q(__a); +} +#endif + /* vec_signed */ static __inline__ vector signed int __ATTRS_o_ai @@ -17269,6 +17305,16 @@ vec_mod(vector unsigned long long __a, vector unsigned long long __b) { return __a % __b; } +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mod(vector signed __int128 __a, vector signed __int128 __b) { + return __a % __b; +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mod(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a % __b; +} + /* vec_sldbi */ #define vec_sldb(__a, __b, __c) __builtin_altivec_vsldbi(__a, __b, (__c & 0x7)) diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index 89f49ad..b6788d7 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -1481,3 +1481,21 @@ vector unsigned __int128 test_vec_xl_zext_i64(void) { // CHECK: ret <1 x i128> return vec_xl_zext(llb, ullap); } + +vector signed __int128 test_vec_signextq_s128(void) { + // CHECK: @llvm.ppc.altivec.vextsd2q(<2 x i64> + // CHECK-NEXT: ret <1 x i128> + return vec_signextq(vslla); +} + +vector unsigned __int128 test_vec_mod_u128(void) { + // CHECK: urem <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_mod(vui128a, vui128b); +} + +vector signed __int128 test_vec_mod_s128(void) { + // CHECK: srem <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_mod(vsi128a, vsi128b); +} diff --git a/clang/test/CodeGen/builtins-ppc-p9vector.c b/clang/test/CodeGen/builtins-ppc-p9vector.c index e920cb7..0fbcdc5 100644 --- a/clang/test/CodeGen/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/builtins-ppc-p9vector.c @@ -1227,3 +1227,32 @@ vector unsigned long long test119(void) { return vec_extract4b(vuca, -5); } +vector signed int test_vec_signexti_si_sc(void) { + // CHECK: @llvm.ppc.altivec.vextsb2w(<16 x i8> + // CHECK-NEXT: ret <4 x i32> + return vec_signexti(vsca); +} + +vector signed int test_vec_signexti_si_ss(void) { + // CHECK: @llvm.ppc.altivec.vextsh2w(<8 x i16> + // CHECK-NEXT: ret <4 x i32> + return vec_signexti(vssa); +} + +vector signed long long test_vec_signextll_sll_sc(void) { + // CHECK: @llvm.ppc.altivec.vextsb2d(<16 x i8> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vsca); +} + +vector signed long long test_vec_signextll_sll_ss(void) { + // CHECK: @llvm.ppc.altivec.vextsh2d(<8 x i16> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vssa); +} + +vector signed long long test_vec_signextll_sll_si(void) { + // CHECK: @llvm.ppc.altivec.vextsw2d(<4 x i32> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vsia); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 219c0d56..f2655d4 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -801,6 +801,20 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; + // Vector Sign Extension Instructions + def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">, + Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>; + // Other multiplies. def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 706ccb6..88ace9e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -888,6 +888,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SREM, MVT::v2i64, Legal); setOperationAction(ISD::UREM, MVT::v4i32, Legal); setOperationAction(ISD::SREM, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v1i128, Legal); + setOperationAction(ISD::SREM, MVT::v1i128, Legal); setOperationAction(ISD::UDIV, MVT::v1i128, Legal); setOperationAction(ISD::SDIV, MVT::v1i128, Legal); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 920eeed..3b65f4d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1449,11 +1449,16 @@ def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", [(set v2i64:$vD, (cttz v2i64:$vB))]>; // Vector Extend Sign -def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; -def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; -def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; -def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; -def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", + [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", + [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>; let isCodeGenOnly = 1 in { def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>; def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 1f5f93c..9111d61 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1449,11 +1449,14 @@ let Predicates = [IsISA3_1] in { def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmodsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmoduq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), - "vextsd2q $vD, $vB", IIC_VecGeneral, []>; + "vextsd2q $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll index e4ef038..94a8058 100644 --- a/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll @@ -10,6 +10,28 @@ ; The vector modulo instructions operate on signed and unsigned words ; and doublewords. +; The vector modulo instructions operate on signed and unsigned words, +; doublewords and 128-bit values. + + +define <1 x i128> @test_vmodsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vmodsq: +; CHECK: # %bb.0: +; CHECK-NEXT: vmodsq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = srem <1 x i128> %x, %y + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmoduq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vmoduq: +; CHECK: # %bb.0: +; CHECK-NEXT: vmoduq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = urem <1 x i128> %x, %y + ret <1 x i128> %tmp +} + define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmodud: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll b/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll new file mode 100644 index 0000000..f4f68cb --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test vector sign extend builtins. + +declare <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64>) nounwind readnone + +define <1 x i128> @test_vextsd2q(<2 x i64> %x) nounwind readnone { +; CHECK-LABEL: test_vextsd2q: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsd2q v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64> %x) + ret <1 x i128> %tmp +} diff --git a/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll b/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll new file mode 100644 index 0000000..36d6b41 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test vector sign extend builtins. + +declare <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32>) nounwind readnone + +define <4 x i32> @test_vextsb2w(<16 x i8> %x) nounwind readnone { +; CHECK-LABEL: test_vextsb2w: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsb2w v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8> %x) + ret <4 x i32> %tmp +} + +define <2 x i64> @test_vextsb2d(<16 x i8> %x) nounwind readnone { +; CHECK-LABEL: test_vextsb2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsb2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8> %x) + ret <2 x i64> %tmp +} + +define <4 x i32> @test_vextsh2w(<8 x i16> %x) nounwind readnone { +; CHECK-LABEL: test_vextsh2w: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsh2w v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16> %x) + ret <4 x i32> %tmp +} + +define <2 x i64> @test_vextsh2d(<8 x i16> %x) nounwind readnone { +; CHECK-LABEL: test_vextsh2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsh2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16> %x) + ret <2 x i64> %tmp +} + +define <2 x i64> @test_vextsw2d(<4 x i32> %x) nounwind readnone { +; CHECK-LABEL: test_vextsw2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsw2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32> %x) + ret <2 x i64> %tmp +} -- 2.7.4