From 4d86bed0bb2d109f9aa802cc09377900b84c09bb Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 9 Sep 2016 12:52:24 +0000 Subject: [PATCH] [Thumb] Select (CMPZ X, -C) -> (CMPZ (ADDS X, C), 0) The CMPZ #0 disappears during peepholing, leaving just a tADDi3, tADDi8 or t2ADDri. This avoids having to materialize the expensive negative constant in Thumb-1, and allows a shrinking from a 32-bit CMN to a 16-bit ADDS in Thumb-2. llvm-svn: 281040 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 42 ++++++++++++++++++++++++++++++ llvm/test/CodeGen/ARM/lsr-icmp-imm.ll | 7 ++--- llvm/test/CodeGen/ARM/select_xform.ll | 2 +- llvm/test/CodeGen/Thumb/cmp-add-fold.ll | 32 +++++++++++++++++++++++ llvm/test/CodeGen/Thumb2/lsr-deficiency.ll | 2 +- llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll | 2 +- 6 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb/cmp-add-fold.ll diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3d3a595..30586aa 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3126,6 +3126,48 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } + + case ARMISD::CMPZ: { + // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) + // This allows us to avoid materializing the expensive negative constant. + // The CMPZ #0 is useless and will be peepholed away but we need to keep it + // for its glue output. + SDValue X = N->getOperand(0); + auto *C = dyn_cast(N->getOperand(1).getNode()); + if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { + int64_t Addend = -C->getSExtValue(); + + SDNode *Add = nullptr; + // In T2 mode, ADDS can be better than CMN if the immediate fits in a + // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. + // Outside that range we can just use a CMN which is 32-bit but has a + // 12-bit immediate range. + if (Subtarget->isThumb2() && Addend < 1<<8) { + SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); + } else if (!Subtarget->isThumb2() && Addend < 1<<8) { + // FIXME: Add T1 tADDi8 code. + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops); + } else if (!Subtarget->isThumb2() && Addend < 1<<3) { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops); + } + if (Add) { + SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; + CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); + } + } + // Other cases are autogenerated. + break; + } + case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll index ef98727..3c68cc7 100644 --- a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll +++ b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s -; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T +; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A ; LSR should compare against the post-incremented induction variable. ; In this case, the immediate value is -2 which requires a cmn instruction. @@ -7,7 +7,8 @@ ; CHECK-LABEL: f: ; CHECK: %for.body ; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2 -; CHECK: cmn{{.*}}[[IV]], #2 +; CHECK-T: adds{{.*}}[[IV]], #2 +; CHECK-A: cmn{{.*}}[[IV]], #2 ; CHECK: bne define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp { entry: diff --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll index 460ca8f..8c1502e 100644 --- a/llvm/test/CodeGen/ARM/select_xform.ll +++ b/llvm/test/CodeGen/ARM/select_xform.ll @@ -280,7 +280,7 @@ entry: ; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}} ; T2-LABEL: t18: -; T2: and.w r0, {{r[0-9]+}} +; T2: and{{s|.w}} r0, {{r[0-9]+}} %cmp = icmp ne i32 %x, 0 %cond = select i1 %cmp, i32 5, i32 2 %cmp1 = icmp ne i32 %x, -1 diff --git a/llvm/test/CodeGen/Thumb/cmp-add-fold.ll b/llvm/test/CodeGen/Thumb/cmp-add-fold.ll new file mode 100644 index 0000000..b0ad8ab --- /dev/null +++ b/llvm/test/CodeGen/Thumb/cmp-add-fold.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T1 %s +; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T2 %s + +; CHECK-LABEL: addri1: +; CHECK: adds r0, #3 +; T1-NEXT: b{{eq|ne}} +; T2-NOT: cmp +define i32 @addri1(i32 %a, i32 %b) { + %c = add i32 %a, 3 + %d = icmp eq i32 %c, 0 + br i1 %d, label %true, label %false + +true: + ret i32 4 +false: + ret i32 5 +} + +; CHECK-LABEL: addri2: +; CHECK: adds r0, #254 +; T1-NEXT: b{{eq|ne}} +; T2-NOT: cmp +define i32 @addri2(i32 %a, i32 %b) { + %c = add i32 %a, 254 + %d = icmp eq i32 %c, 0 + br i1 %d, label %true, label %false + +true: + ret i32 4 +false: + ret i32 5 +} diff --git a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll index ccf7fae..bd1be6b 100644 --- a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -16,7 +16,7 @@ entry: bb: ; preds = %bb, %entry ; CHECK: LBB0_1: ; CHECK: subs [[R2:r[0-9]+]], #1 -; CHECK: cmp.w [[R2]], #-1 +; CHECK: adds {{.*}}, [[R2]], #1 ; CHECK: bne LBB0_1 %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; [#uses=1] diff --git a/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll b/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll index 42473c2..fbec7f3 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll @@ -3,7 +3,7 @@ ; -0x000000bb = 4294967109 define i1 @f1(i32 %a) { ; CHECK-LABEL: f1: -; CHECK: cmn.w {{r.*}}, #187 +; CHECK: adds {{r.*}}, #187 %tmp = icmp ne i32 %a, 4294967109 ret i1 %tmp } -- 2.7.4