From: Jonas Paulsson Date: Wed, 27 Feb 2019 00:18:28 +0000 (+0000) Subject: [SystemZ] Pass regalloc hints to help Load-and-Test transformations. X-Git-Tag: llvmorg-9.0.0-rc1~11190 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=129826cd9fb5a86ad1d92eeb1b933a2c82e2721f;p=platform%2Fupstream%2Fllvm.git [SystemZ] Pass regalloc hints to help Load-and-Test transformations. Since there is no "Load-and-Test-High" instruction, the 32 bit load of a register to be compared with 0 can only be implemented with LT if the virtual GRX32 register ends up in a low part (GR32 register). This patch detects these cases and passes the GR32 registers (low parts) as (soft) hints in getRegAllocationHints(). Review: Ulrich Weigand. llvm-svn: 354935 --- diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 0e9e7e33420b..4a347f641c40 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -53,6 +53,26 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO, return RC; } +// Pass the registers of RC as hints while making sure that if any of these +// registers are copy hints (and therefore already in Hints), hint them +// first. +static void addHints(ArrayRef Order, + SmallVectorImpl &Hints, + const TargetRegisterClass *RC, + const MachineRegisterInfo *MRI) { + SmallSet CopyHints; + CopyHints.insert(Hints.begin(), Hints.end()); + Hints.clear(); + for (MCPhysReg Reg : Order) + if (CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); + for (MCPhysReg Reg : Order) + if (!CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); +} + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -75,7 +95,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!DoneRegs.insert(Reg).second) continue; - for (auto &Use : MRI->use_instructions(Reg)) + for (auto &Use : MRI->use_instructions(Reg)) { // For LOCRMux, see if the other operand is already a high or low // register, and in that case give the correpsonding hints for // VirtReg. LOCR instructions need both operands in either high or @@ -87,19 +107,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), getRC32(TrueMO, VRM, MRI)); if (RC && RC != &SystemZ::GRX32BitRegClass) { - // Pass the registers of RC as hints while making sure that if - // any of these registers are copy hints, hint them first. - SmallSet CopyHints; - CopyHints.insert(Hints.begin(), Hints.end()); - Hints.clear(); - for (MCPhysReg Reg : Order) - if (CopyHints.count(Reg) && - RC->contains(Reg) && !MRI->isReserved(Reg)) - Hints.push_back(Reg); - for (MCPhysReg Reg : Order) - if (!CopyHints.count(Reg) && - RC->contains(Reg) && !MRI->isReserved(Reg)) - Hints.push_back(Reg); + addHints(Order, Hints, RC, MRI); // Return true to make these hints the only regs available to // RA. This may mean extra spilling but since the alternative is // a jump sequence expansion of the LOCRMux, it is preferred. @@ -111,7 +119,22 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) Worklist.push_back(OtherReg); - } + } // end LOCRMux + else if (Use.getOpcode() == SystemZ::CHIMux || + Use.getOpcode() == SystemZ::CFIMux) { + if (Use.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + // Return false to make these hints preferred but not obligatory. + return false; + } + } + } // end CHIMux / CFIMux + } } } diff --git a/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir b/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir new file mode 100644 index 000000000000..707e7c538fd5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir @@ -0,0 +1,166 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=greedy %s -o - \ +# RUN: -debug-only=regalloc 2>&1 | FileCheck %s +# +# REQUIRES: asserts +# +# Test that regalloc hints are passed for compare with zero cases that can be +# converted to load-and-test. + +--- | + ; ModuleID = './tc.ll' + source_filename = "proof.c" + target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + target triple = "s390x-ibm-linux" + + @rootlosers = external dso_local local_unnamed_addr global [300 x i32], align 4 + + define dso_local void @proofnumberscan() local_unnamed_addr #0 { + bb: + br i1 undef, label %bb20.preheader, label %bb1.preheader + + bb1.preheader: ; preds = %bb + br label %bb1 + + bb20.preheader: ; preds = %bb + br label %bb20 + + bb1: ; preds = %bb1.preheader, %bb15 + %lsr.iv3 = phi [512 x i32]* [ undef, %bb1.preheader ], [ %2, %bb15 ] + %lsr.iv1 = phi [300 x i32]* [ @rootlosers, %bb1.preheader ], [ %1, %bb15 ] + %lsr.iv = phi i32 [ 0, %bb1.preheader ], [ %lsr.iv.next, %bb15 ] + %tmp2 = phi i32 [ %tmp18, %bb15 ], [ 0, %bb1.preheader ] + %tmp3 = phi i32 [ %tmp17, %bb15 ], [ 100000000, %bb1.preheader ] + %lsr.iv35 = bitcast [512 x i32]* %lsr.iv3 to i32* + %tmp5 = load i32, i32* %lsr.iv35, align 4, !tbaa !1 + %tmp6 = load i32, i32* undef, align 4, !tbaa !1 + %tmp7 = icmp eq i32 %tmp6, 0 + br i1 %tmp7, label %bb15, label %bb8 + + bb8: ; preds = %bb1 + %0 = bitcast [300 x i32]* %lsr.iv1 to i32* + %tmp10 = load i32, i32* %0, align 4, !tbaa !1 + %tmp11 = icmp eq i32 %tmp10, 0 + %tmp12 = select i1 %tmp11, i32 %tmp5, i32 %tmp3 + %tmp14 = select i1 %tmp11, i32 %lsr.iv, i32 %tmp2 + br label %bb15 + + bb15: ; preds = %bb8, %bb1 + %tmp16 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb8 ] + %tmp17 = phi i32 [ %tmp3, %bb1 ], [ %tmp12, %bb8 ] + %tmp18 = phi i32 [ %tmp2, %bb1 ], [ %tmp14, %bb8 ] + %lsr.iv.next = add i32 %lsr.iv, 4 + %scevgep = getelementptr [300 x i32], [300 x i32]* %lsr.iv1, i64 0, i64 4 + %1 = bitcast i32* %scevgep to [300 x i32]* + %scevgep4 = getelementptr [512 x i32], [512 x i32]* %lsr.iv3, i64 0, i64 4 + %2 = bitcast i32* %scevgep4 to [512 x i32]* + br label %bb1 + + bb20: ; preds = %bb20, %bb20.preheader + br label %bb20 + } + + attributes #0 = { "target-cpu"="z13" "use-soft-float"="false" } + + !llvm.ident = !{!0} + + !0 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git 29e2813a2ab7d5569860bb07892dfef7b5374d96) (http://llvm.org/git/llvm.git 546f779cb9d4ac2ce9c9b9522019f500abca9522)"} + !1 = !{!2, !2, i64 0} + !2 = !{!"int", !3, i64 0} + !3 = !{!"omnipotent char", !4, i64 0} + !4 = !{!"Simple C/C++ TBAA"} + +... + +# CHECK: ********** MACHINEINSTRS ********** +# CHECK: LMux +# CHECK: [[VREG0:%[0-9]+]]:grx32bit = LMux +# CHECK: CHIMux [[VREG0]]:grx32bit, 0, implicit-def $cc +# CHECK: [[VREG1:%[0-9]+]]:grx32bit = LMux +# CHECK: CHIMux [[VREG1]]:grx32bit, 0, implicit-def $cc +# CHECK: selectOrSplit GRX32Bit:[[VREG0]] +# CHECK-NEXT: hints: $r0l $r1l $r2l $r3l $r4l $r5l $r14l $r13l $r12l $r11l $r10l $r9l $r8l $r7l $r6l +# CHECK-NEXT: assigning [[VREG0]] to $[[PREG0:r[0-9]+]]l +# CHECK: selectOrSplit GRX32Bit:[[VREG1]] +# CHECK-NEXT: hints: $r0l $r1l $r2l $r3l $r4l $r5l $r14l $r13l $r12l $r11l $r10l $r9l $r8l $r7l $r6l +# CHECK-NEXT: assigning [[VREG1]] to $[[PREG1:r[0-9]+]]l +# CHECK: lt %[[PREG0]] +# CHECK: lt %[[PREG1]] + +--- +name: proofnumberscan +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: addr64bit } + - { id: 2, class: grx32bit } + - { id: 3, class: grx32bit } + - { id: 4, class: grx32bit } + - { id: 5, class: grx32bit } + - { id: 6, class: grx32bit } + - { id: 7, class: grx32bit } + - { id: 8, class: grx32bit } + - { id: 9, class: grx32bit } + - { id: 10, class: grx32bit } + - { id: 11, class: grx32bit } + - { id: 12, class: gr64bit } + - { id: 13, class: gr64bit } + - { id: 14, class: grx32bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr64bit } + - { id: 17, class: grx32bit } + - { id: 18, class: grx32bit } + - { id: 19, class: addr64bit } + - { id: 20, class: grx32bit } + - { id: 21, class: addr64bit } + - { id: 22, class: addr64bit } + - { id: 23, class: grx32bit } + - { id: 24, class: grx32bit } + - { id: 25, class: grx32bit } + - { id: 26, class: grx32bit } + - { id: 27, class: grx32bit } +body: | + bb.0.bb: + successors: %bb.1, %bb.2 + + %23:grx32bit = LHIMux 0 + CHIMux %23, 0, implicit-def $cc + BRC 14, 8, %bb.2, implicit killed $cc + + bb.1: + J %bb.6 + + bb.2.bb1.preheader: + %25:grx32bit = IIFMux 100000000 + %22:addr64bit = LARL @rootlosers + %21:addr64bit = IMPLICIT_DEF + %24:grx32bit = LHIMux 0 + J %bb.3 + + bb.3.bb1: + successors: %bb.7(0x30000000), %bb.4(0x50000000) + + %5:grx32bit = LMux %21, 0, $noreg :: (load 4 from %ir.lsr.iv35, !tbaa !1) + %6:grx32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`, !tbaa !1) + CHIMux %6, 0, implicit-def $cc + BRC 14, 6, %bb.4, implicit killed $cc + + bb.7: + J %bb.5 + + bb.4.bb8: + %20:grx32bit = LMux %22, 0, $noreg :: (load 4 from %ir.0, !tbaa !1) + CHIMux %20, 0, implicit-def $cc + %25:grx32bit = LOCRMux %25, %5, 14, 8, implicit $cc + %24:grx32bit = LOCRMux %24, %23, 14, 8, implicit killed $cc + + bb.5.bb15: + %23:grx32bit = AHIMux %23, 4, implicit-def dead $cc + %22:addr64bit = LA %22, 16, $noreg + %21:addr64bit = LA %21, 16, $noreg + J %bb.3 + + bb.6.bb20: + J %bb.6 + +...