From 1a2147feadb80f3b0d4c4cb6892498b0bae549fe Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Mon, 7 Dec 2020 20:33:20 +0900 Subject: [PATCH] [VE] Add vsum and vfsum intrinsic instructions Add vsum and vfsum intrinsic instructions and regression tests. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D92938 --- llvm/include/llvm/IR/IntrinsicsVEVL.gen.td | 10 +++ llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td | 10 +++ llvm/test/CodeGen/VE/VELIntrinsics/vfsum.ll | 66 +++++++++++++++++++ llvm/test/CodeGen/VE/VELIntrinsics/vsum.ll | 96 ++++++++++++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 llvm/test/CodeGen/VE/VELIntrinsics/vfsum.ll create mode 100644 llvm/test/CodeGen/VE/VELIntrinsics/vsum.ll diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td index 5e86a1b..d2d9650 100644 --- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -1084,3 +1084,13 @@ let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td index 8c1518e..623eadc 100644 --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -1307,3 +1307,13 @@ def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN, def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>; def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>; diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfsum.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfsum.ll new file mode 100644 index 0000000..430f9a5 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfsum.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector floating sum intrinsic instructions +;;; +;;; Note: +;;; We test VFSUM*vl and VFSUM*vml instructions. + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vfsumd_vvl(<256 x double> %0) { +; CHECK-LABEL: vfsumd_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsum.d %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vfsumd.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vfsumd.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vfsumd_vvml(<256 x double> %0, <256 x i1> %1) { +; CHECK-LABEL: vfsumd_vvml: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsum.d %v0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vfsumd.vvml(<256 x double> %0, <256 x i1> %1, i32 256) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vfsumd.vvml(<256 x double>, <256 x i1>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vfsums_vvl(<256 x double> %0) { +; CHECK-LABEL: vfsums_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsum.s %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vfsums.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vfsums.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vfsums_vvml(<256 x double> %0, <256 x i1> %1) { +; CHECK-LABEL: vfsums_vvml: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsum.s %v0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vfsums.vvml(<256 x double> %0, <256 x i1> %1, i32 256) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vfsums.vvml(<256 x double>, <256 x i1>, i32) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vsum.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vsum.ll new file mode 100644 index 0000000..5d26a44 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vsum.ll @@ -0,0 +1,96 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector sum intrinsic instructions +;;; +;;; Note: +;;; We test VSUM*vl and VSUM*vml instructions. + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsumwsx_vvl(<256 x double> %0) { +; CHECK-LABEL: vsumwsx_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.w.sx %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vsumwsx.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsumwsx.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsumwsx_vvml(<256 x double> %0, <256 x i1> %1) { +; CHECK-LABEL: vsumwsx_vvml: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.w.sx %v0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vsumwsx.vvml(<256 x double> %0, <256 x i1> %1, i32 256) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsumwsx.vvml(<256 x double>, <256 x i1>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsumwzx_vvl(<256 x double> %0) { +; CHECK-LABEL: vsumwzx_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.w.zx %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vsumwzx.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsumwzx.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsumwzx_vvml(<256 x double> %0, <256 x i1> %1) { +; CHECK-LABEL: vsumwzx_vvml: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.w.zx %v0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vsumwzx.vvml(<256 x double> %0, <256 x i1> %1, i32 256) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsumwzx.vvml(<256 x double>, <256 x i1>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsuml_vvl(<256 x double> %0) { +; CHECK-LABEL: vsuml_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.l %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vsuml.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsuml.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vsuml_vvml(<256 x double> %0, <256 x i1> %1) { +; CHECK-LABEL: vsuml_vvml: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsum.l %v0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vsuml.vvml(<256 x double> %0, <256 x i1> %1, i32 256) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vsuml.vvml(<256 x double>, <256 x i1>, i32) -- 2.7.4