AMDGPU: Refine rcp/rsq intrinsic folding for modern FP rules

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 22 May 2020 01:04:06 +0000 (21:04 -0400)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 23 May 2020 17:28:36 +0000 (13:28 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 22 May 2020 01:04:06 +0000 (21:04 -0400)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 23 May 2020 17:28:36 +0000 (13:28 -0400)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

index 89ffe06..118013a 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3500,18 +3500,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      Value *Src = II->getArgOperand(0);
  
      // TODO: Move to ConstantFolding/InstSimplify?
-    if (isa<UndefValue>(Src))
-      return replaceInstUsesWith(CI, Src);
+    if (isa<UndefValue>(Src)) {
+      Type *Ty = II->getType();
+      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+      return replaceInstUsesWith(CI, QNaN);
+    }
+
+    if (II->isStrictFP())
+      break;
  
      if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
        const APFloat &ArgVal = C->getValueAPF();
        APFloat Val(ArgVal.getSemantics(), 1);
-      APFloat::opStatus Status = Val.divide(ArgVal,
-                                            APFloat::rmNearestTiesToEven);
-      // Only do this if it was exact and therefore not dependent on the
-      // rounding mode.
-      if (Status == APFloat::opOK)
-        return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+      Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
+
+      // This is more precise than the instruction may give.
+      //
+      // TODO: The instruction always flushes denormal results (except for f16),
+      // should this also?
+      return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
      }
  
      break;
@@ -3520,8 +3527,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      Value *Src = II->getArgOperand(0);
  
      // TODO: Move to ConstantFolding/InstSimplify?
-    if (isa<UndefValue>(Src))
-      return replaceInstUsesWith(CI, Src);
+    if (isa<UndefValue>(Src)) {
+      Type *Ty = II->getType();
+      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+      return replaceInstUsesWith(CI, QNaN);
+    }
+
      break;
    }
    case Intrinsic::amdgcn_frexp_mant:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

index 032ca08..a3a99e5 100644 (file)
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -10,7 +10,7 @@ declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
  
  define float @test_constant_fold_rcp_f32_undef() nounwind {
  ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef(
-; CHECK-NEXT:    ret float undef
+; CHECK-NEXT:    ret float 0x7FF8000000000000
  ;
    %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
    ret float %val
@@ -50,8 +50,7 @@ define double @test_constant_fold_rcp_f64_half() nounwind {
  
  define float @test_constant_fold_rcp_f32_43() nounwind {
  ; CHECK-LABEL: @test_constant_fold_rcp_f32_43(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
-; CHECK-NEXT:    ret float [[VAL]]
+; CHECK-NEXT:    ret float 0x3F97D05F40000000
  ;
    %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
    ret float %val
@@ -59,13 +58,21 @@ define float @test_constant_fold_rcp_f32_43() nounwind {
  
  define double @test_constant_fold_rcp_f64_43() nounwind {
  ; CHECK-LABEL: @test_constant_fold_rcp_f64_43(
-; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
-; CHECK-NEXT:    ret double [[VAL]]
+; CHECK-NEXT:    ret double 0x3F97D05F417D05F4
  ;
    %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
    ret double %val
  }
  
+define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #7
+; CHECK-NEXT:    ret float [[VAL]]
+;
+  %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
+  ret float %val
+}
+
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.rsq
  ; --------------------------------------------------------------------
@@ -74,7 +81,7 @@ declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
  
  define float @test_constant_fold_rsq_f32_undef() nounwind {
  ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef(
-; CHECK-NEXT:    ret float undef
+; CHECK-NEXT:    ret float 0x7FF8000000000000
  ;
    %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
    ret float %val
@@ -2387,8 +2394,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
  
  define i64 @ballot_nocombine_64(i1 %i) {
  ; CHECK-LABEL: @ballot_nocombine_64(
-; CHECK-NEXT:    %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
-; CHECK-NEXT:    ret i64 %b
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
+; CHECK-NEXT:    ret i64 [[B]]
  ;
    %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
    ret i64 %b
@@ -2413,8 +2420,8 @@ define i64 @ballot_one_64() {
  
  define i32 @ballot_nocombine_32(i1 %i) {
  ; CHECK-LABEL: @ballot_nocombine_32(
-; CHECK-NEXT:    %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i)
-; CHECK-NEXT:    ret i32 %b
+; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
+; CHECK-NEXT:    ret i32 [[B]]
  ;
    %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i)
    ret i32 %b
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 22 May 2020 01:04:06 +0000 (21:04 -0400)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 23 May 2020 17:28:36 +0000 (13:28 -0400)
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll		patch \| blob \| history