From 34d18fd241abafdf0436cbceebdeff2ecf685ae2 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Sat, 15 Oct 2022 11:50:56 -0700 Subject: [PATCH] [AArch64] Enhance bit-field-positioning op matcher to see through 'any_extend' for pattern 'and(any_extend(shl(val, N)), shifted-mask)' Before this patch (and refactor patch D135843), isBitfieldPositioningOp won't handle "and(any_extend(shl(val, N), shifted-mask)" (bail out if AND op is not SHL) After this patch, isBitfieldPositioningOp will see through "any_extend" to find "shl" to find possible bit-field-positioning nodes. https://gcc.godbolt.org/z/3ncGKbGW6 is a four-liner LLVM IR that could be optimized to UBFIZ (see added test case test_and_extended_shift_with_imm in llvm/test/CodeGen/AArch64/bitfield-insert.ll). One existing test case also improves. Differential Revision: https://reviews.llvm.org/D135852 --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 45 ++++++++++++++++++++++--- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 16 +++++++-- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 629b240..844f9c0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2580,17 +2580,54 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, SDValue AndOp0 = Op.getOperand(0); uint64_t ShlImm; - if (!isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) + SDValue ShlOp0; + if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { + // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. + ShlOp0 = AndOp0.getOperand(0); + } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && + isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, + ShlImm)) { + // For pattern "and(any_extend(shl(val, N)), shifted-mask)" + + // ShlVal == shl(val, N), which is a left shift on a smaller type. + SDValue ShlVal = AndOp0.getOperand(0); + + // Since this is after type legalization and ShlVal is extended to MVT::i64, + // expect VT to be MVT::i32. + assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); + + // Widens 'val' to MVT::i64 as the source of bit field positioning. + ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); + } else return false; - // Bail out if the SHL has more than one use, since then we'll end up - // generating SHL+UBFIZ instead of just keeping SHL+AND. + // For !BiggerPattern, bail out if the AndOp0 has more than one use, since + // then we'll end up generating AndOp0+UBFIZ instead of just keeping + // AndOp0+AND. if (!BiggerPattern && !AndOp0.hasOneUse()) return false; DstLSB = countTrailingZeros(NonZeroBits); Width = countTrailingOnes(NonZeroBits >> DstLSB); + // Bail out on large Width. This happens when no proper combining / constant + // folding was performed. + if (Width >= (int)VT.getSizeInBits()) { + // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and + // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to + // "val". + // If VT is i32, what Width >= 32 means: + // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op + // demands at least 'Width' bits (after dag-combiner). This together with + // `any_extend` Op (undefined higher bits) indicates missed combination + // when lowering the 'and' IR instruction to an machine IR instruction. + LLVM_DEBUG( + dbgs() + << "Found large Width in bit-field-positioning -- this indicates no " + "proper combining / constant folding was performed\n"); + return false; + } + // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL // amount. BiggerPattern is true when this pattern is being matched for BFI, @@ -2599,7 +2636,7 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) return false; - Src = getLeftShift(CurDAG, AndOp0.getOperand(0), ShlImm - DstLSB); + Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); return true; } diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index 5cc7143..a27e293 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -580,9 +580,8 @@ define <2 x i32> @test_complex_type(<2 x i32>* %addr, i64 %in, i64* %bf ) { define i64 @test_truncated_shift(i64 %x, i64 %y) { ; CHECK-LABEL: test_truncated_shift: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #25 -; CHECK-NEXT: lsr x8, x8, #25 -; CHECK-NEXT: bfi x0, x8, #25, #5 +; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1 +; CHECK-NEXT: bfi x0, x1, #25, #5 ; CHECK-NEXT: ret entry: %and = and i64 %x, -1040187393 @@ -591,3 +590,14 @@ entry: %or = or i64 %and5, %and ret i64 %or } + +define i64 @test_and_extended_shift_with_imm(i64 %0) { +; CHECK-LABEL: test_and_extended_shift_with_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0 +; CHECK-NEXT: ubfiz x0, x0, #7, #8 +; CHECK-NEXT: ret + %2 = shl i64 %0, 7 + %3 = and i64 %2, 32640 ; #0x7f80 + ret i64 %3 +} -- 2.7.4