From 733f4ed1bb42d2080fd24dd5c4155e7277abafc4 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 25 Jul 2018 18:22:22 +0000 Subject: [PATCH] [ARM] Prefer lsls+lsrs over lsls+ands or lsrs+ands in Thumb1. Saves materializing the immediate for the "ands". Corresponding patterns exist for lsrs+lsls, but that seems less common in practice. Now implemented as a DAGCombine. Differential Revision: https://reviews.llvm.org/D49585 llvm-svn: 337945 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 81 +++++++++++++++++++++ llvm/test/CodeGen/Thumb/shift-and.ll | 125 ++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb/shift-and.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2e0f609..47222a6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -10676,6 +10676,83 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static SDValue CombineANDShift(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Allow DAGCombine to pattern-match before we touch the canonical form. + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + ConstantSDNode *N1C = dyn_cast(N->getOperand(1)); + if (!N1C) + return SDValue(); + + uint32_t C1 = (uint32_t)N1C->getZExtValue(); + // Don't transform uxtb/uxth. + if (C1 == 255 || C1 == 65535) + return SDValue(); + + SDNode *N0 = N->getOperand(0).getNode(); + if (!N0->hasOneUse()) + return SDValue(); + + if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL) + return SDValue(); + + bool LeftShift = N0->getOpcode() == ISD::SHL; + + ConstantSDNode *N01C = dyn_cast(N0->getOperand(1)); + if (!N01C) + return SDValue(); + + uint32_t C2 = (uint32_t)N01C->getZExtValue(); + if (!C2 || C2 >= 32) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + // We have a pattern of the form "(and (shl x, c2) c1)" or + // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to + // transform to a pair of shifts, to save materializing c1. + + // First pattern: right shift, and c1+1 is a power of two. + // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power + // of two). + // FIXME: Use demanded bits? + if (!LeftShift && isMask_32(C1)) { + uint32_t C3 = countLeadingZeros(C1); + if (C2 < C3) { + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0), + DAG.getConstant(C3 - C2, DL, MVT::i32)); + return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL, + DAG.getConstant(C3, DL, MVT::i32)); + } + } + + // Second pattern: left shift, and (c1>>c2)+1 is a power of two. + // FIXME: Also check reversed pattern (right shift, and ~(c1<> C3)) { + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0), + DAG.getConstant(C2 + C3, DL, MVT::i32)); + return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL, + DAG.getConstant(C3, DL, MVT::i32)); + } + } + + // FIXME: Transform "(and (shl x, c2) c1)" -> + // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than + // c1. + return SDValue(); +} + static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -10717,6 +10794,10 @@ static SDValue PerformANDCombine(SDNode *N, return Result; } + if (Subtarget->isThumb1Only()) + if (SDValue Result = CombineANDShift(N, DCI, Subtarget)) + return Result; + return SDValue(); } diff --git a/llvm/test/CodeGen/Thumb/shift-and.ll b/llvm/test/CodeGen/Thumb/shift-and.ll new file mode 100644 index 0000000..fa6ee1d --- /dev/null +++ b/llvm/test/CodeGen/Thumb/shift-and.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s + +define i32 @test1(i32 %x) { +; CHECK-LABEL: test1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #20 +; CHECK-NEXT: lsrs r0, r0, #22 +; CHECK-NEXT: bx lr +entry: + %0 = lshr i32 %x, 2 + %shr = and i32 %0, 1023 + ret i32 %shr +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r1, r0, #2 +; CHECK-NEXT: ldr r0, .LCPI1_0 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 1022 @ 0x3fe +entry: + %0 = lshr i32 %x, 2 + %shr = and i32 %0, 1022 + ret i32 %shr +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: test3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r0, r0, #2 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr +entry: + %0 = lshr i32 %x, 2 + %shr = and i32 %0, 255 + ret i32 %shr +} + +define i32 @test4(i32 %x) { +; CHECK-LABEL: test4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #4 +; CHECK-NEXT: movs r1, #127 +; CHECK-NEXT: bics r0, r1 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 4 + %shr = and i32 %0, -128 + ret i32 %shr +} + +define i32 @test5(i32 %x) { +; CHECK-LABEL: test5: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: lsrs r0, r0, #2 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 29 + %shr = and i32 %0, 536870912 + ret i32 %shr +} + +define i32 @test6(i32 %x) { +; CHECK-LABEL: test6: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r1, #5 +; CHECK-NEXT: lsls r1, r1, #29 +; CHECK-NEXT: lsls r0, r0, #29 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 29 + %shr = and i32 %0, 2684354560 + ret i32 %shr +} + +define i32 @test7(i32 %x) { +; CHECK-LABEL: test7: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r1, r0, #29 +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr i32 %x, 29 + %shr = and i32 %0, 4 + ret i32 %shr +} + +define i32 @test8(i32 %x) { +; CHECK-LABEL: test8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r1, r0, #29 +; CHECK-NEXT: movs r0, #5 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr i32 %x, 29 + %shr = and i32 %0, 5 + ret i32 %shr +} + +define i32 @test9(i32 %x) { +; CHECK-LABEL: test9: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r1, r0, #2 +; CHECK-NEXT: ldr r0, .LCPI8_0 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI8_0: +; CHECK-NEXT: .long 1073741822 @ 0x3ffffffe +entry: + %and = lshr i32 %x, 2 + %shr = and i32 %and, 1073741822 + ret i32 %shr +} -- 2.7.4