From 33260cf2c547b8c23eddcc9f95b6db2e828a55cf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 17 Mar 2023 12:00:05 -0400
Subject: [PATCH] Intrinsics: Make vector reduce intrinsics speculatable

---
 llvm/include/llvm/IR/Intrinsics.td                 |   2 +-
 .../Transforms/SpeculativeExecution/spec-calls.ll  | 193 +++++++++++++++++++++
 2 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 84a4ca2..f42998a 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1966,7 +1966,7 @@ def int_memset_element_unordered_atomic
 
 //===------------------------ Reduction Intrinsics ------------------------===//
 //
-let IntrProperties = [IntrNoMem] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
 
   def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
                                          [LLVMVectorElementType<0>,
diff --git a/llvm/test/Transforms/SpeculativeExecution/spec-calls.ll b/llvm/test/Transforms/SpeculativeExecution/spec-calls.ll
index 6a693b4..8b40ac5 100644
--- a/llvm/test/Transforms/SpeculativeExecution/spec-calls.ll
+++ b/llvm/test/Transforms/SpeculativeExecution/spec-calls.ll
@@ -109,3 +109,196 @@ b:
 }
 
 declare half @llvm.fptrunc.round.f16.f32(float, metadata)
+
+; CHECK-LABEL: @ifThen_vector_reduce_fadd(
+; CHECK: %reduce = call float @llvm.vector.reduce.fadd.v2f32(float %x, <2 x float> %y)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_fadd(float %x, <2 x float> %y) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call float @llvm.vector.reduce.fadd.v2f32(float %x, <2 x float> %y)
+  br label %b
+
+b:
+  ret void
+}
+
+declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_fmul(
+; CHECK: %reduce = call float @llvm.vector.reduce.fmul.v2f32(float %x, <2 x float> %y)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_fmul(float %x, <2 x float> %y) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call float @llvm.vector.reduce.fmul.v2f32(float %x, <2 x float> %y)
+  br label %b
+
+b:
+  ret void
+}
+
+declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_add(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_add(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_mul(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_mul(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
+
+
+; CHECK-LABEL: @ifThen_vector_reduce_and(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_and(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_or(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_or(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_xor(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_xor(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_smax(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_smax(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_umax(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_umax(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_umin(
+; CHECK: %reduce = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_umin(<2 x i32> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_fmax(
+; CHECK: %reduce = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_fmax(<2 x float> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+
+; CHECK-LABEL: @ifThen_vector_reduce_fmin(
+; CHECK: %reduce = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
+; CHECK-NEXT: br i1 true
+define void @ifThen_vector_reduce_fmin(<2 x float> %x) {
+  br i1 true, label %a, label %b
+
+a:
+  %reduce = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
+  br label %b
+
+b:
+  ret void
+}
+
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
-- 
2.7.4