From 1c0681757669880bda144aeb56dcad6901a2016b Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 28 Oct 2022 21:57:23 +0000 Subject: [PATCH] Revert "[AArch64] Optimize memcmp when the result is tested for [in]equality with 0" This reverts commit 01ff511593d1a4920fa3c1d450ad2077661e0bdc. It triggers an assertion failure in SelectionDAG.cpp see https://github.com/llvm/llvm-project/issues/58675 for details. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 29 ----- llvm/test/CodeGen/AArch64/atomicrmw-O0.ll | 132 ++++++++++----------- llvm/test/CodeGen/AArch64/bcmp-inline-small.ll | 12 +- llvm/test/CodeGen/AArch64/bcmp.ll | 36 ++++-- llvm/test/CodeGen/AArch64/dag-combine-setcc.ll | 79 ------------ llvm/test/CodeGen/AArch64/i128-cmp.ll | 26 ++-- .../AArch64/umulo-128-legalisation-lowering.ll | 8 +- 7 files changed, 117 insertions(+), 205 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index abf88b4..3194f54 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19490,35 +19490,6 @@ static SDValue performSETCCCombine(SDNode *N, } } - // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as: - // cmp A0, A0; ccmp A0, B1, 0, eq; cmp inv(Cond) flag - if (!DCI.isBeforeLegalize() && VT.isScalarInteger() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && - LHS->getOpcode() == ISD::OR && - (LHS.getOperand(0)->getOpcode() == ISD::XOR && - LHS.getOperand(1)->getOpcode() == ISD::XOR) && - LHS.hasOneUse() && LHS.getOperand(0)->hasOneUse() && - LHS.getOperand(1)->hasOneUse()) { - SDValue XOR0 = LHS.getOperand(0); - SDValue XOR1 = LHS.getOperand(1); - SDValue CCVal = DAG.getConstant(AArch64CC::EQ, DL, MVT_CC); - EVT TstVT = LHS->getValueType(0); - SDValue Cmp = - DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(TstVT, MVT::Glue), - XOR0.getOperand(0), XOR0.getOperand(1)); - SDValue Overflow = Cmp.getValue(1); - SDValue NZCVOp = DAG.getConstant(0, DL, MVT::i32); - SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, XOR1.getOperand(0), - XOR1.getOperand(1), NZCVOp, CCVal, Overflow); - // Invert CSEL's operands. - SDValue TVal = DAG.getConstant(1, DL, VT); - SDValue FVal = DAG.getConstant(0, DL, VT); - AArch64CC::CondCode CC = changeIntCCToAArch64CC(Cond); - AArch64CC::CondCode InvCC = AArch64CC::getInvertedCondCode(CC); - return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, - DAG.getConstant(InvCC, DL, MVT::i32), CCmp); - } - return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll index ec5f8e2..d16c8aa 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -216,40 +216,38 @@ define i128 @test_rmw_add_128(i128* %dst) { ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 ; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x10, [sp, #24] // 8-byte Folded Reload -; NOLSE-NEXT: adds x14, x13, #1 +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: adds x14, x8, #1 ; NOLSE-NEXT: cinc x15, x11, hs ; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 -; NOLSE-NEXT: ldaxp x12, x8, [x10] -; NOLSE-NEXT: cmp x12, x13 -; NOLSE-NEXT: cset w9, ne -; NOLSE-NEXT: cmp x8, x11 -; NOLSE-NEXT: cinc w9, w9, ne -; NOLSE-NEXT: cbnz w9, .LBB4_4 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB4_4 ; NOLSE-NEXT: // %bb.3: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 -; NOLSE-NEXT: stlxp w9, x14, x15, [x10] -; NOLSE-NEXT: cbnz w9, .LBB4_2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB4_2 ; NOLSE-NEXT: b .LBB4_5 ; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 -; NOLSE-NEXT: stlxp w9, x12, x8, [x10] -; NOLSE-NEXT: cbnz w9, .LBB4_2 +; NOLSE-NEXT: stlxp w12, x10, x9, [x13] +; NOLSE-NEXT: cbnz w12, .LBB4_2 ; NOLSE-NEXT: .LBB4_5: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 -; NOLSE-NEXT: mov x9, x8 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 ; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill -; NOLSE-NEXT: mov x10, x12 ; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill -; NOLSE-NEXT: subs x12, x12, x13 -; NOLSE-NEXT: ccmp x8, x11, #0, eq -; NOLSE-NEXT: cset w8, ne ; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; NOLSE-NEXT: tbnz w8, #0, .LBB4_1 +; NOLSE-NEXT: cbnz x8, .LBB4_1 ; NOLSE-NEXT: b .LBB4_6 ; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -269,26 +267,26 @@ define i128 @test_rmw_add_128(i128* %dst) { ; LSE-NEXT: b .LBB4_1 ; LSE-NEXT: .LBB4_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload +; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload ; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; LSE-NEXT: mov x0, x11 -; LSE-NEXT: mov x1, x8 -; LSE-NEXT: adds x2, x11, #1 -; LSE-NEXT: cinc x10, x8, hs +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: adds x2, x8, #1 +; LSE-NEXT: cinc x11, x10, hs ; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 -; LSE-NEXT: mov x3, x10 +; LSE-NEXT: mov x3, x11 ; LSE-NEXT: caspal x0, x1, x2, x3, [x9] ; LSE-NEXT: mov x9, x1 ; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 ; LSE-NEXT: mov x10, x0 ; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill -; LSE-NEXT: subs x11, x10, x11 -; LSE-NEXT: ccmp x9, x8, #0, eq -; LSE-NEXT: cset w8, ne +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 ; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; LSE-NEXT: tbnz w8, #0, .LBB4_1 +; LSE-NEXT: cbnz x8, .LBB4_1 ; LSE-NEXT: b .LBB4_2 ; LSE-NEXT: .LBB4_2: // %atomicrmw.end ; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -608,44 +606,42 @@ define i128 @test_rmw_nand_128(i128* %dst) { ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 ; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x10, [sp, #24] // 8-byte Folded Reload -; NOLSE-NEXT: mov w8, w13 -; NOLSE-NEXT: mvn w9, w8 -; NOLSE-NEXT: // implicit-def: $x8 -; NOLSE-NEXT: mov w8, w9 -; NOLSE-NEXT: orr x14, x8, #0xfffffffffffffffe +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: mov w9, w8 +; NOLSE-NEXT: mvn w10, w9 +; NOLSE-NEXT: // implicit-def: $x9 +; NOLSE-NEXT: mov w9, w10 +; NOLSE-NEXT: orr x14, x9, #0xfffffffffffffffe ; NOLSE-NEXT: mov x15, #-1 ; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 ; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 -; NOLSE-NEXT: ldaxp x12, x8, [x10] -; NOLSE-NEXT: cmp x12, x13 -; NOLSE-NEXT: cset w9, ne -; NOLSE-NEXT: cmp x8, x11 -; NOLSE-NEXT: cinc w9, w9, ne -; NOLSE-NEXT: cbnz w9, .LBB9_4 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB9_4 ; NOLSE-NEXT: // %bb.3: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 -; NOLSE-NEXT: stlxp w9, x14, x15, [x10] -; NOLSE-NEXT: cbnz w9, .LBB9_2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB9_2 ; NOLSE-NEXT: b .LBB9_5 ; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 -; NOLSE-NEXT: stlxp w9, x12, x8, [x10] -; NOLSE-NEXT: cbnz w9, .LBB9_2 +; NOLSE-NEXT: stlxp w12, x10, x9, [x13] +; NOLSE-NEXT: cbnz w12, .LBB9_2 ; NOLSE-NEXT: .LBB9_5: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 -; NOLSE-NEXT: mov x9, x8 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 ; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill -; NOLSE-NEXT: mov x10, x12 ; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill -; NOLSE-NEXT: subs x12, x12, x13 -; NOLSE-NEXT: ccmp x8, x11, #0, eq -; NOLSE-NEXT: cset w8, ne ; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; NOLSE-NEXT: tbnz w8, #0, .LBB9_1 +; NOLSE-NEXT: cbnz x8, .LBB9_1 ; NOLSE-NEXT: b .LBB9_6 ; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end ; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload @@ -665,30 +661,30 @@ define i128 @test_rmw_nand_128(i128* %dst) { ; LSE-NEXT: b .LBB9_1 ; LSE-NEXT: .LBB9_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload +; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload ; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; LSE-NEXT: mov x0, x11 -; LSE-NEXT: mov x1, x8 -; LSE-NEXT: mov w10, w11 -; LSE-NEXT: mvn w12, w10 -; LSE-NEXT: // implicit-def: $x10 -; LSE-NEXT: mov w10, w12 -; LSE-NEXT: orr x2, x10, #0xfffffffffffffffe -; LSE-NEXT: mov x10, #-1 +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: mov w11, w8 +; LSE-NEXT: mvn w12, w11 +; LSE-NEXT: // implicit-def: $x11 +; LSE-NEXT: mov w11, w12 +; LSE-NEXT: orr x2, x11, #0xfffffffffffffffe +; LSE-NEXT: mov x11, #-1 ; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 -; LSE-NEXT: mov x3, x10 +; LSE-NEXT: mov x3, x11 ; LSE-NEXT: caspal x0, x1, x2, x3, [x9] ; LSE-NEXT: mov x9, x1 ; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 ; LSE-NEXT: mov x10, x0 ; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill -; LSE-NEXT: subs x11, x10, x11 -; LSE-NEXT: ccmp x9, x8, #0, eq -; LSE-NEXT: cset w8, ne +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 ; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill ; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; LSE-NEXT: tbnz w8, #0, .LBB9_1 +; LSE-NEXT: cbnz x8, .LBB9_1 ; LSE-NEXT: b .LBB9_2 ; LSE-NEXT: .LBB9_2: // %atomicrmw.end ; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll index 60b0c37..8a2429b 100644 --- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -12,8 +12,10 @@ define i1 @test_b2(i8* %s1, i8* %s2) { ; CHECKN-NEXT: ldr x9, [x1] ; CHECKN-NEXT: ldur x10, [x0, #7] ; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: cmp x8, x9 -; CHECKN-NEXT: ccmp x10, x11, #0, eq +; CHECKN-NEXT: eor x8, x8, x9 +; CHECKN-NEXT: eor x9, x10, x11 +; CHECKN-NEXT: orr x8, x8, x9 +; CHECKN-NEXT: cmp x8, #0 ; CHECKN-NEXT: cset w0, eq ; CHECKN-NEXT: ret ; @@ -42,8 +44,10 @@ define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) { ; CHECKN-NEXT: ldr x9, [x1] ; CHECKN-NEXT: ldur x10, [x0, #7] ; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: cmp x8, x9 -; CHECKN-NEXT: ccmp x10, x11, #0, eq +; CHECKN-NEXT: eor x8, x8, x9 +; CHECKN-NEXT: eor x9, x10, x11 +; CHECKN-NEXT: orr x8, x8, x9 +; CHECKN-NEXT: cmp x8, #0 ; CHECKN-NEXT: cset w0, eq ; CHECKN-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll index 510c64e..ff94642 100644 --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ b/llvm/test/CodeGen/AArch64/bcmp.ll @@ -113,8 +113,10 @@ define i1 @bcmp7(ptr %a, ptr %b) { ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: ldur w10, [x0, #3] ; CHECK-NEXT: ldur w11, [x1, #3] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w9, w10, w11 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7) @@ -180,8 +182,10 @@ define i1 @bcmp11(ptr %a, ptr %b) { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldur x10, [x0, #3] ; CHECK-NEXT: ldur x11, [x1, #3] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: eor x9, x10, x11 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11) @@ -214,8 +218,10 @@ define i1 @bcmp13(ptr %a, ptr %b) { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldur x10, [x0, #5] ; CHECK-NEXT: ldur x11, [x1, #5] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: eor x9, x10, x11 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13) @@ -230,8 +236,10 @@ define i1 @bcmp14(ptr %a, ptr %b) { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldur x10, [x0, #6] ; CHECK-NEXT: ldur x11, [x1, #6] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: eor x9, x10, x11 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14) @@ -246,8 +254,10 @@ define i1 @bcmp15(ptr %a, ptr %b) { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldur x10, [x0, #7] ; CHECK-NEXT: ldur x11, [x1, #7] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: eor x9, x10, x11 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15) @@ -260,8 +270,10 @@ define i1 @bcmp16(ptr %a, ptr %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: eor x8, x8, x10 +; CHECK-NEXT: eor x9, x9, x11 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16) diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index 885d4a9..f826a80 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -128,82 +128,3 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) { %cmp2 = icmp ne i64 %cast, zeroinitializer ret i1 %cmp2 } - -define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) { -; CHECK-LABEL: combine_setcc_eq0_conjunction_xor_or: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) - %cmp = icmp eq i32 %bcmp, 0 - ret i1 %cmp -} - -define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) { -; CHECK-LABEL: combine_setcc_ne0_conjunction_xor_or: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) - %cmp = icmp ne i32 %bcmp, 0 - ret i1 %cmp -} - -; Doesn't increase the number of instructions, where the LHS has multiple uses -define i32 @combine_setcc_multiuse(i32 %0, i32 %1, i32 %2, i32 %3) { -; CHECK-LABEL: combine_setcc_multiuse: -; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w1, w0 -; CHECK-NEXT: eor w9, w3, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: cbz w8, .LBB10_2 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: b use -; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: ret - %5 = xor i32 %1, %0 - %6 = xor i32 %3, %2 - %7 = or i32 %6, %5 - %8 = icmp eq i32 %7, 0 - br i1 %8, label %11, label %9 - -9: ; preds = %4 - %10 = tail call i32 @use(i32 %7) #2 - br label %11 - -11: ; preds = %4, %9 - %12 = phi i32 [ %10, %9 ], [ %0, %4 ] - ret i32 %12 -} - -; There may be issues with the CMP/CCMP with the scheduling of instructions -; that ISel will create out of the DAG -define i32 @combine_setcc_glue(i128 noundef %x, i128 noundef %y) { -; CHECK-LABEL: combine_setcc_glue: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq -; CHECK-NEXT: ccmp x0, x2, #4, ne -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %cmp3 = icmp eq i128 %x, %y - %conv = trunc i128 %x to i64 - %conv1 = trunc i128 %y to i64 - %cmp = icmp eq i64 %conv, %conv1 - %or7 = or i1 %cmp3, %cmp - %or = zext i1 %or7 to i32 - ret i32 %or -} - -declare i32 @bcmp(ptr nocapture, ptr nocapture, i64) -declare i32 @use(i32 noundef) diff --git a/llvm/test/CodeGen/AArch64/i128-cmp.ll b/llvm/test/CodeGen/AArch64/i128-cmp.ll index b50a559..7cc3e84 100644 --- a/llvm/test/CodeGen/AArch64/i128-cmp.ll +++ b/llvm/test/CodeGen/AArch64/i128-cmp.ll @@ -6,8 +6,10 @@ declare void @call() define i1 @cmp_i128_eq(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: eor x8, x1, x3 +; CHECK-NEXT: eor x9, x0, x2 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cmp = icmp eq i128 %a, %b @@ -17,8 +19,10 @@ define i1 @cmp_i128_eq(i128 %a, i128 %b) { define i1 @cmp_i128_ne(i128 %a, i128 %b) { ; CHECK-LABEL: cmp_i128_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq +; CHECK-NEXT: eor x8, x1, x3 +; CHECK-NEXT: eor x9, x0, x2 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %cmp = icmp ne i128 %a, %b @@ -116,9 +120,10 @@ define i1 @cmp_i128_sle(i128 %a, i128 %b) { define void @br_on_cmp_i128_eq(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: br_on_cmp_i128_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq -; CHECK-NEXT: b.ne .LBB10_2 +; CHECK-NEXT: eor x8, x1, x3 +; CHECK-NEXT: eor x9, x0, x2 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbnz x8, .LBB10_2 ; CHECK-NEXT: // %bb.1: // %call ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl call @@ -137,9 +142,10 @@ exit: define void @br_on_cmp_i128_ne(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: br_on_cmp_i128_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: ccmp x1, x3, #0, eq -; CHECK-NEXT: b.eq .LBB11_2 +; CHECK-NEXT: eor x8, x1, x3 +; CHECK-NEXT: eor x9, x0, x2 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB11_2 ; CHECK-NEXT: // %bb.1: // %call ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl call diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll index e298748..e955014 100644 --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -68,10 +68,12 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align ; AARCH-NEXT: adds x11, x12, x11 ; AARCH-NEXT: adc x12, x13, x14 ; AARCH-NEXT: adds x10, x11, x10 -; AARCH-NEXT: asr x11, x1, #63 ; AARCH-NEXT: adc x9, x12, x9 -; AARCH-NEXT: cmp x10, x11 -; AARCH-NEXT: ccmp x9, x11, #0, eq +; AARCH-NEXT: asr x11, x1, #63 +; AARCH-NEXT: eor x9, x9, x11 +; AARCH-NEXT: eor x10, x10, x11 +; AARCH-NEXT: orr x9, x10, x9 +; AARCH-NEXT: cmp x9, #0 ; AARCH-NEXT: cset w9, ne ; AARCH-NEXT: tbz x8, #63, .LBB1_2 ; AARCH-NEXT: // %bb.1: // %Entry -- 2.7.4