From: Qiu Chaofan Date: Tue, 1 Sep 2020 16:29:12 +0000 (+0800) Subject: [PowerPC] Handle STRICT_FSETCC(S) in more cases X-Git-Tag: llvmorg-13-init~13189 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=29ae4485950ed76faa94dabbd13bbe91d2b5c750;p=platform%2Fupstream%2Fllvm.git [PowerPC] Handle STRICT_FSETCC(S) in more cases On -O0, i1 strict_fsetcc will be promoted to i32. We don't handle that in TD patterns. This patch fills logic in PPCISelDAGToDAG to handle more cases. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D86595 --- diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 51ff0da..62bb5cc 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -218,7 +218,7 @@ namespace { /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl); + const SDLoc &dl, SDValue Chain = SDValue()); /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the @@ -3710,7 +3710,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl) { + const SDLoc &dl, SDValue Chain) { // Always select the LHS. unsigned Opc; @@ -3863,7 +3863,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, assert(Subtarget->hasVSX() && "__float128 requires VSX"); Opc = PPC::XSCMPUQP; } - return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); + if (Chain) + return SDValue( + CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), + 0); + else + return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, @@ -4050,17 +4055,23 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; - ISD::CondCode CC = cast(N->getOperand(2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + ISD::CondCode CC = + cast(N->getOperand(IsStrict ? 3 : 2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + + SDValue LHS = N->getOperand(IsStrict ? 1 : 0); + SDValue RHS = N->getOperand(IsStrict ? 2 : 1); - if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { + if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: { @@ -4095,7 +4106,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { } } } else if (Imm == ~0U) { // setcc op, -1 - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: @@ -4138,12 +4149,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { } } - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. - if (LHS.getValueType().isVector()) { + if (!IsStrict && LHS.getValueType().isVector()) { if (Subtarget->hasSPE()) return false; @@ -4171,7 +4179,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); - SDValue CCReg = SelectCC(LHS, RHS, CC, dl); + SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); + if (IsStrict) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); SDValue IntCR; // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that @@ -4664,6 +4674,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: if (trySETCC(N)) return; break; diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll new file mode 100644 index 0000000..e6bc0f4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr9 -O0 | FileCheck %s + +define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: une_ppcf128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xscmpudp cr7, f2, f4 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 +; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 +; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r5, 1 +; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: and r4, r4, r5 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128 %a, ppc_fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + +; This is a different branch from une +define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: ogt_ppcf128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xscmpudp cr7, f2, f4 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: xscmpudp cr0, f1, f3 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rotlwi r4, r4, 28 +; CHECK-NEXT: stw r4, -4(r1) +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: lwz r5, -4(r1) +; CHECK-NEXT: rotlwi r5, r5, 4 +; CHECK-NEXT: mtocrf 1, r5 +; CHECK-NEXT: mfocrf r5, 1 +; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: and r4, r5, r4 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128 %a, ppc_fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + +define i1 @test_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: test_f128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp cr7, v2, v3 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: # implicit-def: $x4 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 + ret i1 %0 +} + +define i1 @testbr_f64(double %a, double %b) #0 { +; CHECK-LABEL: testbr_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr7, f1, f2 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne cr0, .LBB3_2 +; CHECK-NEXT: b .LBB3_1 +; CHECK-NEXT: .LBB3_1: # %tr +; CHECK-NEXT: li r3, -1 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_2: # %fl +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 + br i1 %0, label %tr, label %fl +tr: + ret i1 true +fl: + ret i1 false +} + +define i1 @testbr_f32(float %a, float %b) #0 { +; CHECK-LABEL: testbr_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu cr7, f1, f2 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne cr0, .LBB4_2 +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_1: # %tr +; CHECK-NEXT: li r3, -1 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_2: # %fl +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 + br i1 %0, label %tr, label %fl +tr: + ret i1 true +fl: + ret i1 false +} + +declare i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +attributes #0 = { strictfp }