[PowerPC] Allow constrained FP intrinsics in mightUseCTR

author Qiu Chaofan <qiucofan@cn.ibm.com>

Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)

committer Qiu Chaofan <qiucofan@cn.ibm.com>

Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)
author Qiu Chaofan <qiucofan@cn.ibm.com>
Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)
committer Qiu Chaofan <qiucofan@cn.ibm.com>
Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index 8434fde..f352971 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -406,6 +406,30 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
            case Intrinsic::loop_decrement:
              return true;
  
+          // Binary operations on 128-bit value will use CTR.
+          case Intrinsic::experimental_constrained_fadd:
+          case Intrinsic::experimental_constrained_fsub:
+          case Intrinsic::experimental_constrained_fmul:
+          case Intrinsic::experimental_constrained_fdiv:
+          case Intrinsic::experimental_constrained_frem:
+            if (F->getType()->getScalarType()->isFP128Ty() ||
+                F->getType()->getScalarType()->isPPC_FP128Ty())
+              return true;
+            break;
+
+          case Intrinsic::experimental_constrained_fptosi:
+          case Intrinsic::experimental_constrained_fptoui:
+          case Intrinsic::experimental_constrained_sitofp:
+          case Intrinsic::experimental_constrained_uitofp: {
+            Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
+            Type *DstType = CI->getType()->getScalarType();
+            if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
+                isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
+                isLargeIntegerTy(!TM.isPPC64(), DstType))
+              return true;
+            break;
+          }
+
            // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
            // because, although it does clobber the counter register, the
            // control can't then return to inside the loop unless there is also
@@ -424,6 +448,15 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
            case Intrinsic::pow:
            case Intrinsic::sin:
            case Intrinsic::cos:
+          case Intrinsic::experimental_constrained_powi:
+          case Intrinsic::experimental_constrained_log:
+          case Intrinsic::experimental_constrained_log2:
+          case Intrinsic::experimental_constrained_log10:
+          case Intrinsic::experimental_constrained_exp:
+          case Intrinsic::experimental_constrained_exp2:
+          case Intrinsic::experimental_constrained_pow:
+          case Intrinsic::experimental_constrained_sin:
+          case Intrinsic::experimental_constrained_cos:
              return true;
            case Intrinsic::copysign:
              if (CI->getArgOperand(0)->getType()->getScalarType()->
@@ -445,6 +478,54 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
            case Intrinsic::llround:            Opcode = ISD::LLROUND;    break;
            case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
            case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
+          case Intrinsic::experimental_constrained_fcmp:
+            Opcode = ISD::STRICT_FSETCC;
+            break;
+          case Intrinsic::experimental_constrained_fcmps:
+            Opcode = ISD::STRICT_FSETCCS;
+            break;
+          case Intrinsic::experimental_constrained_fma:
+            Opcode = ISD::STRICT_FMA;
+            break;
+          case Intrinsic::experimental_constrained_sqrt:
+            Opcode = ISD::STRICT_FSQRT;
+            break;
+          case Intrinsic::experimental_constrained_floor:
+            Opcode = ISD::STRICT_FFLOOR;
+            break;
+          case Intrinsic::experimental_constrained_ceil:
+            Opcode = ISD::STRICT_FCEIL;
+            break;
+          case Intrinsic::experimental_constrained_trunc:
+            Opcode = ISD::STRICT_FTRUNC;
+            break;
+          case Intrinsic::experimental_constrained_rint:
+            Opcode = ISD::STRICT_FRINT;
+            break;
+          case Intrinsic::experimental_constrained_lrint:
+            Opcode = ISD::STRICT_LRINT;
+            break;
+          case Intrinsic::experimental_constrained_llrint:
+            Opcode = ISD::STRICT_LLRINT;
+            break;
+          case Intrinsic::experimental_constrained_nearbyint:
+            Opcode = ISD::STRICT_FNEARBYINT;
+            break;
+          case Intrinsic::experimental_constrained_round:
+            Opcode = ISD::STRICT_FROUND;
+            break;
+          case Intrinsic::experimental_constrained_lround:
+            Opcode = ISD::STRICT_LROUND;
+            break;
+          case Intrinsic::experimental_constrained_llround:
+            Opcode = ISD::STRICT_LLROUND;
+            break;
+          case Intrinsic::experimental_constrained_minnum:
+            Opcode = ISD::STRICT_FMINNUM;
+            break;
+          case Intrinsic::experimental_constrained_maxnum:
+            Opcode = ISD::STRICT_FMAXNUM;
+            break;
            case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
            case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
            }
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll

new file mode 100644 (file)

index 0000000..8e4dd35
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple powerpc64le < %s | FileCheck %s
+
+; Check constrained ops converted to call
+define void @test(double* %cast) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %root
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r29, -24
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    li 30, 0
+; CHECK-NEXT:    addi 29, 3, -8
+; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:  .LBB0_1: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lfdu 1, 8(29)
+; CHECK-NEXT:    bl cos
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 30, 30, 8
+; CHECK-NEXT:    stfdx 1, 0, 29
+; CHECK-NEXT:    cmpldi 30, 2040
+; CHECK-NEXT:    bne 0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %exit
+; CHECK-NEXT:    addi 1, 1, 64
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+root:
+  br label %for.body
+
+exit:
+  ret void
+
+for.body:
+  %i = phi i64 [ 0, %root ], [ %next, %for.body ]
+  %idx = getelementptr inbounds double, double* %cast, i64 %i
+  %val = load double, double* %idx
+  %cos = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.cos.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  store double %cos, double* %idx, align 8
+  %next = add nuw nsw i64 %i, 1
+  %cond = icmp eq i64 %next, 255
+  br i1 %cond, label %exit, label %for.body
+}
+
+; Check constrained ops converted to native instruction
+define void @test2(double* %cast) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 4, 255
+; CHECK-NEXT:    addi 3, 3, -8
+; CHECK-NEXT:    mtctr 4
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB1_1: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lfdu 0, 8(3)
+; CHECK-NEXT:    xssqrtdp 0, 0
+; CHECK-NEXT:    stfdx 0, 0, 3
+; CHECK-NEXT:    bdnz .LBB1_1
+; CHECK-NEXT:  # %bb.2: # %exit
+; CHECK-NEXT:    blr
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %next, %for.body ]
+  %idx = getelementptr inbounds double, double* %cast, i64 %i
+  %val = load double, double* %idx
+  %cos = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.sqrt.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  store double %cos, double* %idx, align 8
+  %next = add nuw nsw i64 %i, 1
+  %cond = icmp eq i64 %next, 255
+  br i1 %cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
author	Qiu Chaofan <qiucofan@cn.ibm.com>
	Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)
committer	Qiu Chaofan <qiucofan@cn.ibm.com>
	Mon, 24 Aug 2020 03:09:58 +0000 (11:09 +0800)
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll	[new file with mode: 0644]	patch \| blob