From 0827e2fa3fd15b49fd2d0fc676753f11abb60cab Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 20 Apr 2023 14:51:47 +0100 Subject: [PATCH] [AArch64] Fix incorrect `isLegalAddressingMode` `AArch64TargetLowering::isLegalAddressingMode` has a number of defects, including accepting an addressing mode which consists of only an immediate operand, or not checking the offset range for an addressing mode in the form `1*ScaledReg + Offs`. This patch fixes the above issues. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D143895 Change-Id: I756fa21941844ded44f082ac7eea4391219f9851 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 70 +++---- .../Target/AArch64/AArch64ISelLowering.cpp | 25 ++- .../Transforms/Scalar/LoopStrengthReduce.cpp | 8 +- .../AArch64/lsr-pre-inc-offset-check.ll | 6 +- .../Target/AArch64/AddressingModes.cpp | 174 ++++++++++++++++++ llvm/unittests/Target/AArch64/CMakeLists.txt | 1 + 6 files changed, 243 insertions(+), 41 deletions(-) create mode 100644 llvm/unittests/Target/AArch64/AddressingModes.cpp diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 55ba5454f15e..95cad24c9e92 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4629,7 +4629,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, return false; } case Instruction::Add: { - // Check to see if we can merge in the RHS then the LHS. If so, we win. + // Check to see if we can merge in one operand, then the other. If so, we + // win. ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); // Start a transaction at this point. @@ -4639,9 +4640,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); + // Try to match an integer constant second to increase its chance of ending + // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`. + int First = 0, Second = 1; + if (isa(AddrInst->getOperand(First)) + && !isa(AddrInst->getOperand(Second))) + std::swap(First, Second); AddrMode.InBounds = false; - if (matchAddr(AddrInst->getOperand(1), Depth + 1) && - matchAddr(AddrInst->getOperand(0), Depth + 1)) + if (matchAddr(AddrInst->getOperand(First), Depth + 1) && + matchAddr(AddrInst->getOperand(Second), Depth + 1)) return true; // Restore the old addr mode info. @@ -4649,9 +4656,10 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, AddrModeInsts.resize(OldSize); TPT.rollback(LastKnownGood); - // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (matchAddr(AddrInst->getOperand(0), Depth + 1) && - matchAddr(AddrInst->getOperand(1), Depth + 1)) + // Otherwise this was over-aggressive. Try merging operands in the opposite + // order. + if (matchAddr(AddrInst->getOperand(Second), Depth + 1) && + matchAddr(AddrInst->getOperand(First), Depth + 1)) return true; // Otherwise we definitely can't merge the ADD in. @@ -4720,36 +4728,35 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // just add it to the disp field and check validity. if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; - if (ConstantOffset == 0 || - TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { - // Check to see if we can fold the base pointer in too. - if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { + if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { if (!cast(AddrInst)->isInBounds()) AddrMode.InBounds = false; return true; - } - } else if (EnableGEPOffsetSplit && isa(AddrInst) && - TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && - ConstantOffset > 0) { - // Record GEPs with non-zero offsets as candidates for splitting in the - // event that the offset cannot fit into the r+i addressing mode. - // Simple and common case that only one GEP is used in calculating the - // address for the memory access. - Value *Base = AddrInst->getOperand(0); - auto *BaseI = dyn_cast(Base); - auto *GEP = cast(AddrInst); - if (isa(Base) || isa(Base) || - (BaseI && !isa(BaseI) && - !isa(BaseI))) { - // Make sure the parent block allows inserting non-PHI instructions - // before the terminator. - BasicBlock *Parent = - BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (!Parent->getTerminator()->isEHPad()) - LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); - } } AddrMode.BaseOffs -= ConstantOffset; + + if (EnableGEPOffsetSplit && isa(AddrInst) && + TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && + ConstantOffset > 0) { + // Record GEPs with non-zero offsets as candidates for splitting in + // the event that the offset cannot fit into the r+i addressing mode. + // Simple and common case that only one GEP is used in calculating the + // address for the memory access. + Value *Base = AddrInst->getOperand(0); + auto *BaseI = dyn_cast(Base); + auto *GEP = cast(AddrInst); + if (isa(Base) || isa(Base) || + (BaseI && !isa(BaseI) && + !isa(BaseI))) { + // Make sure the parent block allows inserting non-PHI instructions + // before the terminator. + BasicBlock *Parent = BaseI ? BaseI->getParent() + : &GEP->getFunction()->getEntryBlock(); + if (!Parent->getTerminator()->isEHPad()) + LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); + } + } + return false; } @@ -6034,6 +6041,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { int64_t Offset = LargeOffsetGEP->second; if (Offset != BaseOffset) { TargetLowering::AddrMode AddrMode; + AddrMode.HasBaseReg = true; AddrMode.BaseOffs = Offset - BaseOffset; // The result type of the GEP might not be the type of the memory // access. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3939f4de416b..fa7fc4ceb284 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14884,7 +14884,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, - const AddrMode &AM, Type *Ty, + const AddrMode &AMode, Type *Ty, unsigned AS, Instruction *I) const { // AArch64 has five basic addressing modes: // reg @@ -14894,11 +14894,30 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, // reg + SIZE_IN_BYTES * reg // No global is ever allowed as a base. - if (AM.BaseGV) + if (AMode.BaseGV) return false; // No reg+reg+imm addressing. - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) + if (AMode.HasBaseReg && AMode.BaseOffs && AMode.Scale) + return false; + + // Canonicalise `1*ScaledReg + imm` into `BaseReg + imm` and + // `2*ScaledReg` into `BaseReg + ScaledReg` + AddrMode AM = AMode; + if (AM.Scale && !AM.HasBaseReg) { + if (AM.Scale == 1) { + AM.HasBaseReg = true; + AM.Scale = 0; + } else if (AM.Scale == 2) { + AM.HasBaseReg = true; + AM.Scale = 1; + } else { + return false; + } + } + + // A base register is required in all addressing modes. + if (!AM.HasBaseReg) return false; // FIXME: Update this method to support scalable addressing modes. diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4083d10df7ee..822b74d9ee2d 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2509,13 +2509,13 @@ LSRInstance::OptimizeLoopTermCond() { int64_t Scale = C->getSExtValue(); if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/0, - /*HasBaseReg=*/false, Scale, + /*HasBaseReg=*/true, Scale, AccessTy.AddrSpace)) goto decline_post_inc; Scale = -Scale; if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/0, - /*HasBaseReg=*/false, Scale, + /*HasBaseReg=*/true, Scale, AccessTy.AddrSpace)) goto decline_post_inc; } @@ -5002,11 +5002,11 @@ static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, return TTI.isLegalAddressingMode( AccessType.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/Diff->getAPInt().getSExtValue(), - /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace) && + /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) && !TTI.isLegalAddressingMode( AccessType.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/-Diff->getAPInt().getSExtValue(), - /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace); + /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace); } /// Pick a register which seems likely to be profitable, and then in any use diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll index bf9aec96ef9b..eb235ca82c9a 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll @@ -20,13 +20,13 @@ define void @test_lsr_pre_inc_offset_check(ptr %p) { ; CHECK-LABEL: test_lsr_pre_inc_offset_check: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, #165 -; CHECK-NEXT: add x9, x0, #340 +; CHECK-NEXT: add x9, x0, #339 ; CHECK-NEXT: mov w10, #2 ; CHECK-NEXT: .LBB0_1: // %main ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stur wzr, [x9, #-1] +; CHECK-NEXT: str wzr, [x9] ; CHECK-NEXT: subs x8, x8, #1 -; CHECK-NEXT: strb w10, [x9] +; CHECK-NEXT: strb w10, [x9, #1] ; CHECK-NEXT: add x9, x9, #338 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/unittests/Target/AArch64/AddressingModes.cpp b/llvm/unittests/Target/AArch64/AddressingModes.cpp new file mode 100644 index 000000000000..e2560aa363b1 --- /dev/null +++ b/llvm/unittests/Target/AArch64/AddressingModes.cpp @@ -0,0 +1,174 @@ +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; + +namespace { + +struct TestCase { + TargetLowering::AddrMode AM; + unsigned TypeBits; + bool Result; +}; + +const std::initializer_list Tests = { + // {BaseGV, BaseOffs, HasBaseReg, Scale}, Bits, Result + {{reinterpret_cast(-1), 0, false}, 64, false}, + {{nullptr, 8, true, 1}, 64, false}, + {{nullptr, 0, false, 2}, 64, true}, + {{nullptr, 0, false, 1}, 64, true}, + {{nullptr, 4, false, 0}, 64, false}, + + {{nullptr, 0, true, 1}, 64, true}, + {{nullptr, 0, true, 1}, 32, true}, + {{nullptr, 0, true, 1}, 16, true}, + {{nullptr, 0, true, 1}, 8, true}, + + {{nullptr, 0, true, 2}, 64, false}, + {{nullptr, 0, true, 2}, 32, false}, + {{nullptr, 0, true, 2}, 16, true}, + {{nullptr, 0, true, 2}, 8, false}, + {{nullptr, 0, true, 4}, 64, false}, + {{nullptr, 0, true, 4}, 32, true}, + {{nullptr, 0, true, 4}, 16, false}, + {{nullptr, 0, true, 4}, 8, false}, + + {{nullptr, 0, true, 8}, 64, true}, + {{nullptr, 0, true, 8}, 32, false}, + {{nullptr, 0, true, 8}, 16, false}, + {{nullptr, 0, true, 8}, 8, false}, + + {{nullptr, 0, true, 16}, 64, false}, + {{nullptr, 0, true, 16}, 32, false}, + {{nullptr, 0, true, 16}, 16, false}, + {{nullptr, 0, true, 16}, 8, false}, + + {{nullptr, -257, true, 0}, 64, false}, + {{nullptr, -256, true, 0}, 64, true}, + {{nullptr, -255, true, 0}, 64, true}, + {{nullptr, -1, true, 0}, 64, true}, + {{nullptr, 0, true, 0}, 64, true}, + {{nullptr, 1, true, 0}, 64, true}, + {{nullptr, 254, true, 0}, 64, true}, + {{nullptr, 255, true, 0}, 64, true}, + {{nullptr, 256, true, 0}, 64, true}, + {{nullptr, 257, true, 0}, 64, false}, + {{nullptr, 258, true, 0}, 64, false}, + {{nullptr, 259, true, 0}, 64, false}, + {{nullptr, 260, true, 0}, 64, false}, + {{nullptr, 261, true, 0}, 64, false}, + {{nullptr, 262, true, 0}, 64, false}, + {{nullptr, 263, true, 0}, 64, false}, + {{nullptr, 264, true, 0}, 64, true}, + + {{nullptr, 4096 * 8 - 8, true, 0}, 64, true}, + {{nullptr, 4096 * 8 - 7, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 6, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 5, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 4, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 3, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 2, true, 0}, 64, false}, + {{nullptr, 4096 * 8 - 1, true, 0}, 64, false}, + {{nullptr, 4096 * 8, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 1, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 2, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 3, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 4, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 5, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 6, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 7, true, 0}, 64, false}, + {{nullptr, 4096 * 8 + 8, true, 0}, 64, false}, + + {{nullptr, -257, true, 0}, 32, false}, + {{nullptr, -256, true, 0}, 32, true}, + {{nullptr, -255, true, 0}, 32, true}, + {{nullptr, -1, true, 0}, 32, true}, + {{nullptr, 0, true, 0}, 32, true}, + {{nullptr, 1, true, 0}, 32, true}, + {{nullptr, 254, true, 0}, 32, true}, + {{nullptr, 255, true, 0}, 32, true}, + {{nullptr, 256, true, 0}, 32, true}, + {{nullptr, 257, true, 0}, 32, false}, + {{nullptr, 258, true, 0}, 32, false}, + {{nullptr, 259, true, 0}, 32, false}, + {{nullptr, 260, true, 0}, 32, true}, + + {{nullptr, 4096 * 4 - 4, true, 0}, 32, true}, + {{nullptr, 4096 * 4 - 3, true, 0}, 32, false}, + {{nullptr, 4096 * 4 - 2, true, 0}, 32, false}, + {{nullptr, 4096 * 4 - 1, true, 0}, 32, false}, + {{nullptr, 4096 * 4, true, 0}, 32, false}, + {{nullptr, 4096 * 4 + 1, true, 0}, 32, false}, + {{nullptr, 4096 * 4 + 2, true, 0}, 32, false}, + {{nullptr, 4096 * 4 + 3, true, 0}, 32, false}, + {{nullptr, 4096 * 4 + 4, true, 0}, 32, false}, + + {{nullptr, -257, true, 0}, 16, false}, + {{nullptr, -256, true, 0}, 16, true}, + {{nullptr, -255, true, 0}, 16, true}, + {{nullptr, -1, true, 0}, 16, true}, + {{nullptr, 0, true, 0}, 16, true}, + {{nullptr, 1, true, 0}, 16, true}, + {{nullptr, 254, true, 0}, 16, true}, + {{nullptr, 255, true, 0}, 16, true}, + {{nullptr, 256, true, 0}, 16, true}, + {{nullptr, 257, true, 0}, 16, false}, + {{nullptr, 258, true, 0}, 16, true}, + + {{nullptr, 4096 * 2 - 2, true, 0}, 16, true}, + {{nullptr, 4096 * 2 - 1, true, 0}, 16, false}, + {{nullptr, 4096 * 2, true, 0}, 16, false}, + {{nullptr, 4096 * 2 + 1, true, 0}, 16, false}, + {{nullptr, 4096 * 2 + 2, true, 0}, 16, false}, + + {{nullptr, -257, true, 0}, 8, false}, + {{nullptr, -256, true, 0}, 8, true}, + {{nullptr, -255, true, 0}, 8, true}, + {{nullptr, -1, true, 0}, 8, true}, + {{nullptr, 0, true, 0}, 8, true}, + {{nullptr, 1, true, 0}, 8, true}, + {{nullptr, 254, true, 0}, 8, true}, + {{nullptr, 255, true, 0}, 8, true}, + {{nullptr, 256, true, 0}, 8, true}, + {{nullptr, 257, true, 0}, 8, true}, + + {{nullptr, 4096 - 2, true, 0}, 8, true}, + {{nullptr, 4096 - 1, true, 0}, 8, true}, + {{nullptr, 4096, true, 0}, 8, false}, + {{nullptr, 4096 + 1, true, 0}, 8, false}, + +}; +} // namespace + +TEST(AddressingModes, AddressingModes) { + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAArch64Target(); + LLVMInitializeAArch64TargetMC(); + + std::string Error; + auto TT = Triple::normalize("aarch64"); + const Target *T = TargetRegistry::lookupTarget(TT, Error); + + std::unique_ptr TM( + T->createTargetMachine(TT, "generic", "", TargetOptions(), std::nullopt, + std::nullopt, CodeGenOpt::Default)); + AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(), + TM->getTargetCPU(), TM->getTargetFeatureString(), *TM, + true); + + auto *TLI = ST.getTargetLowering(); + DataLayout DL("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"); + LLVMContext Ctx; + + for (const auto &Test : Tests) { + Type *Typ = Type::getIntNTy(Ctx, Test.TypeBits); + ASSERT_EQ(TLI->isLegalAddressingMode(DL, Test.AM, Typ, 0), Test.Result); + } +} diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt index 4a719aa37c4d..ca5e9b3251d3 100644 --- a/llvm/unittests/Target/AArch64/CMakeLists.txt +++ b/llvm/unittests/Target/AArch64/CMakeLists.txt @@ -22,6 +22,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_target_unittest(AArch64Tests AArch64InstPrinterTest.cpp + AddressingModes.cpp DecomposeStackOffsetTest.cpp InstSizes.cpp MatrixRegisterAliasing.cpp -- 2.34.1