From 43df29e20622dab89e9e8d4023baa2d215b6fd7e Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sun, 1 Nov 2020 10:59:28 +0900 Subject: [PATCH] [VE] Optimize address calculation Optimize address calculations using LEA/LEASL instructions. Update comments in VEISelLowering.cpp also. Update an existing regression test optimized by this modification. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90878 --- llvm/lib/Target/VE/VEISelLowering.cpp | 18 ++-- llvm/lib/Target/VE/VEInstrInfo.td | 98 ++++++++++------------ .../CodeGen/VE/Scalar/pic_access_static_data.ll | 3 +- 3 files changed, 54 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index c2d8d73..4299e8f 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -940,23 +940,19 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { if (isa(Op) || (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { // Create following instructions for local linkage PIC code. - // lea %s35, %gotoff_lo(.LCPI0_0) - // and %s35, %s35, (32)0 - // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) - // adds.l %s35, %s15, %s35 ; %s15 is GOT - // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + // lea %reg, label@gotoff_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, label@gotoff_hi(%reg, %got) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, VEMCExpr::VK_VE_GOTOFF_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); } // Create following instructions for not local linkage PIC code. - // lea %s35, %got_lo(.LCPI0_0) - // and %s35, %s35, (32)0 - // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) - // adds.l %s35, %s15, %s35 ; %s15 is GOT - // ld %s35, (,%s35) - // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + // lea %reg, label@got_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, label@got_hi(%reg) + // ld %reg, (%reg, %got) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, VEMCExpr::VK_VE_GOT_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index c4a4692..ba59f0c 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -944,23 +944,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64>; } +// LEA basic patterns. +// Need to be defined here to prioritize LEA over ADX. def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>; def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>; def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>; def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>; -def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp), - [(add (add node:$base, node:$idx), node:$disp), - (add (add node:$base, node:$disp), node:$idx)]>; -def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp), - (LEArii $base, (LO7 $idx), (LO32 $disp))>; -def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp), - (LEArri $base, $idx, (LO32 $disp))>; -def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp), - (LEASLrii $base, (LO7 $idx), (HI32 $disp))>; -def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp), - (LEASLrri $base, $idx, (HI32 $disp))>; - // Multiclass for load instructions. let mayLoad = 1, hasSideEffects = 0 in multiclass LOADm opc, RegisterClass RC, ValueType Ty, @@ -1566,6 +1556,50 @@ def : Pat<(i64 imm:$val), (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0, (HI32 imm:$val))>; +// LEA patterns +def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp), + [(add (add node:$base, node:$idx), node:$disp), + (add (add node:$base, node:$disp), node:$idx), + (add node:$base, (add $idx, $disp))]>; +def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp), + (LEArii $base, (LO7 $idx), (LO32 $disp))>; +def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp), + (LEArri $base, $idx, (LO32 $disp))>; +def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp), + (LEASLrii $base, (LO7 $idx), (HI32 $disp))>; +def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp), + (LEASLrri $base, $idx, (HI32 $disp))>; + +// Address calculation patterns and optimizations +// +// Generate following instructions: +// 1. LEA %reg, label@LO32 +// AND %reg, %reg, (32)0 +// 2. LEASL %reg, label@HI32 +// 3. (LEA %reg, label@LO32) +// (AND %reg, %reg, (32)0) +// LEASL %reg, label@HI32(, %reg) +// 4. (LEA %reg, label@LO32) +// (AND %reg, %reg, (32)0) +// LEASL %reg, label@HI32(%reg, %got) +// +def velo_only : OutPatFrag<(ops node:$lo), + (ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>; +def vehi_only : OutPatFrag<(ops node:$hi), + (LEASLzii 0, 0, $hi)>; +def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo), + (LEASLrii $lo, 0, $hi)>; +def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo), + (LEASLrri $base, $lo, $hi)>; +foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", + "tglobaltlsaddr" ] in { + def : Pat<(VElo !cast(type):$lo), (velo_only $lo)>; + def : Pat<(VEhi !cast(type):$hi), (vehi_only $hi)>; + def : Pat<(add (VEhi !cast(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>; + def : Pat<(add I64:$base, (add (VEhi !cast(type):$hi), I64:$lo)), + (vehi_baselo $base, $hi, $lo)>; +} + // floating point def : Pat<(f32 fpimm:$val), (EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>; @@ -1813,46 +1847,6 @@ defm : TRATMSTm; defm : TRATMSTm; defm : TRATMSTm; -// Address calculation and its optimization -def : Pat<(VEhi tconstpool:$in), (LEASLzii 0, 0, tconstpool:$in)>; -def : Pat<(VElo tconstpool:$in), - (ANDrm (LEAzii 0, 0, tconstpool:$in), !add(32, 64))>; -def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tconstpool:$in2), !add(32, 64)), 0, - (tconstpool:$in1))>; - -// Address calculation and its optimization -def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>; -def : Pat<(VElo tglobaladdr:$in), - (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>; -def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0, - (tglobaladdr:$in1))>; - -// Address calculation and its optimization -def : Pat<(VEhi tblockaddress:$in), (LEASLzii 0, 0, tblockaddress:$in)>; -def : Pat<(VElo tblockaddress:$in), - (ANDrm (LEAzii 0, 0, tblockaddress:$in), !add(32, 64))>; -def : Pat<(add (VEhi tblockaddress:$in1), (VElo tblockaddress:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tblockaddress:$in2), !add(32, 64)), 0, - (tblockaddress:$in1))>; - -// GlobalTLS address calculation and its optimization -def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>; -def : Pat<(VElo tglobaltlsaddr:$in), - (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>; -def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0, - (tglobaltlsaddr:$in1))>; - -// Address calculation and its optimization -def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>; -def : Pat<(VElo texternalsym:$in), - (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>; -def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0, - (texternalsym:$in1))>; - // Branches def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>; diff --git a/llvm/test/CodeGen/VE/Scalar/pic_access_static_data.ll b/llvm/test/CodeGen/VE/Scalar/pic_access_static_data.ll index 62bb782..62afe5c 100644 --- a/llvm/test/CodeGen/VE/Scalar/pic_access_static_data.ll +++ b/llvm/test/CodeGen/VE/Scalar/pic_access_static_data.ll @@ -54,8 +54,7 @@ define i32 @main() { ; CHECK-NEXT: st %s1, 184(, %s11) ; CHECK-NEXT: lea %s0, .L.str@gotoff_lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(, %s0) -; CHECK-NEXT: adds.l %s0, %s15, %s0 +; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(%s0, %s15) ; CHECK-NEXT: lea %s12, printf@plt_lo(-24) ; CHECK-NEXT: and %s12, %s12, (32)0 ; CHECK-NEXT: sic %s16 -- 2.7.4