From 4ece4cd77e6648db518bfc4e8cc1fa8ba97ed33f Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 19 Dec 2021 21:53:50 +0000 Subject: [PATCH] [ARM] Fold away CMP/CSINC from CMOV This makes use of the code in D114013 to fold away unnecessary CMPZ/CSINC starting from a CMOV, in a similar way to how we fold away CSINV/CSINC/etc Differential Revision: https://reviews.llvm.org/D115185 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 17 +++++++++++++++++ llvm/test/CodeGen/Thumb2/mve-pred-or.ll | 24 +++++++++--------------- llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll | 8 +++----- llvm/test/CodeGen/Thumb2/mve-pred-xor.ll | 24 +++++++++--------------- llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll | 14 +++++--------- 5 files changed, 43 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0d8f265..e38938e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18027,6 +18027,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (!VT.isInteger()) return SDValue(); + // Fold away an unneccessary CMPZ/CMOV + // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) -> + // if C1==EQ -> CMOV A, B, C2, $cpsr, D + // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D + if (N->getConstantOperandVal(2) == ARMCC::EQ || + N->getConstantOperandVal(2) == ARMCC::NE) { + ARMCC::CondCodes Cond; + if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) { + if (N->getConstantOperandVal(2) == ARMCC::NE) + Cond = ARMCC::getOppositeCondition(Cond); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), + N->getOperand(1), + DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32), + N->getOperand(3), C); + } + } + // Materialize a boolean comparison for integers so we can avoid branching. if (isNullConstant(FalseVal)) { if (CC == ARMCC::EQ && isOneConstant(TrueVal)) { diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index 2310e38..cb3f554 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -380,24 +380,22 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: vmov r1, r2, d2 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r1, #1 ; CHECK-NEXT: rsbs r0, r1, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: orrs r0, r2 ; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r2, #1 ; CHECK-NEXT: rsbs r0, r2, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 @@ -422,14 +420,12 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r12, r2, d5 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: vmov r12, r2, d5 ; CHECK-NEXT: vmov r3, r0, d3 ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: eor.w r2, r3, r12 @@ -437,10 +433,8 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, eq -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll index b53b1d9..747021e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll @@ -73,28 +73,26 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i ; CHECK-NEXT: vmov r0, r1, d4 ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: vmov r2, r3, d2 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, r1, r2, ne +; CHECK-NEXT: csel r0, r1, r2, eq ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d5 -; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: orr.w r12, r0, r2 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: cset r12, eq ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: vmov r3, r0, d3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: orrs r0, r3 ; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r2, r0, ne +; CHECK-NEXT: csel r0, r2, r0, eq ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index 8741b2b..e5fef33 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -460,24 +460,22 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: vmov r1, r2, d2 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r1, r1, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: eoreq r1, r1, #1 ; CHECK-NEXT: rsbs r0, r1, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: orrs r0, r2 ; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r2, r2, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: eoreq r2, r2, #1 ; CHECK-NEXT: rsbs r0, r2, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 @@ -502,14 +500,12 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r12, r2, d5 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r0, r0, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: eoreq r0, r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: vmov r12, r2, d5 ; CHECK-NEXT: vmov r3, r0, d3 ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: eor.w r2, r3, r12 @@ -517,10 +513,8 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, eq -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r0, r0, #1 +; CHECK-NEXT: it eq +; CHECK-NEXT: eoreq r0, r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index 5dd8d2d..10ecdc0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -47,12 +47,10 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r1, r1, #1 -; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: eormi r1, r1, #1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: vmov r1, r3, d3 ; CHECK-NEXT: adds r1, r1, r0 @@ -66,10 +64,8 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 ; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r0, r0, #1 +; CHECK-NEXT: it mi +; CHECK-NEXT: eormi r0, r0, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: asrs r0, r5, #31 -- 2.7.4