bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
- if (MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_GLOBAL_VALUE: {
// On Darwin, TLS global vars get selected into function calls, which
// we don't want localized, as they can get moved into the middle of a
// another call sequence.
const GlobalValue &GV = *MI.getOperand(1).getGlobal();
if (GV.isThreadLocal() && Subtarget->isTargetMachO())
return false;
+ break;
+ }
+ // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
+ // localizable.
+ case AArch64::ADRP:
+ case AArch64::G_ADD_LOW:
+ return true;
+ default:
+ break;
}
return TargetLoweringBase::shouldLocalize(MI, TTI);
}
--- /dev/null
+//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64GenericInstruction : GenericInstruction {
+ let Namespace = "AArch64";
+}
+
+// A pseudo to represent a relocatable add instruction as part of address
+// computation.
+def G_ADD_LOW : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src, type2:$imm);
+ let hasSideEffects = 0;
+}
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
+
+include "AArch64InstrGISel.td"
return selectAddrModeUnscaled(Root, 16);
}
+ /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
+ /// from complex pattern matchers like selectAddrModeIndexed().
+ ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
+ MachineRegisterInfo &MRI) const;
+
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
return contractCrossBankCopyIntoStore(I, MRI);
case TargetOpcode::G_PTR_ADD:
return convertPtrAddToAdd(I, MRI);
+ case TargetOpcode::G_LOAD: {
+ // For scalar loads of pointers, we try to convert the dest type from p0
+ // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
+ // conversion, this should be ok because all users should have been
+ // selected already, so the type doesn't matter for them.
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ if (!DstTy.isPointer())
+ return false;
+ MRI.setType(DstReg, LLT::scalar(64));
+ return true;
+ }
default:
return false;
}
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
- if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
+ if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
+ case AArch64::G_ADD_LOW: {
+ I.setDesc(TII.get(AArch64::ADDXri));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_BSWAP: {
// Handle vector types for G_BSWAP directly.
Register DstReg = I.getOperand(0).getReg();
return None;
}
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
+ unsigned Size,
+ MachineRegisterInfo &MRI) const {
+ if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
+ return None;
+ MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
+ if (Adrp.getOpcode() != AArch64::ADRP)
+ return None;
+
+ // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
+ // TODO: Need to check GV's offset % size if doing offset folding into globals.
+ assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
+ auto GV = Adrp.getOperand(1).getGlobal();
+ if (GV->isThreadLocal())
+ return None;
+
+ unsigned Alignment = GV->getAlignment();
+ Type *Ty = GV->getValueType();
+ auto &MF = *RootDef.getParent()->getParent();
+ if (Alignment == 0 && Ty->isSized())
+ Alignment = MF.getDataLayout().getABITypeAlignment(Ty);
+
+ if (Alignment < Size)
+ return None;
+
+ unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
+ MachineIRBuilder MIRBuilder(RootDef);
+ Register AdrpReg = Adrp.getOperand(0).getReg();
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addGlobalAddress(GV, /* Offset */ 0,
+ OpFlags | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ }}};
+}
+
/// Select a "register plus scaled unsigned 12-bit immediate" address. The
/// "Size" argument is the size in bytes of the memory reference, which
/// determines the scale.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const {
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
if (!Root.isReg())
return None;
}};
}
+ CodeModel::Model CM = MF.getTarget().getCodeModel();
+ // Check if we can fold in the ADD of small code model ADRP + ADD address.
+ if (CM == CodeModel::Small) {
+ auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
+ if (OpFns)
+ return OpFns;
+ }
+
if (isBaseWithConstantOffset(Root, MRI)) {
MachineOperand &LHS = RootDef->getOperand(1);
MachineOperand &RHS = RootDef->getOperand(2);
using namespace LegalizeMutations;
using namespace LegalityPredicates;
-AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
+AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
+ : ST(&ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
const LLT s1 = LLT::scalar(1);
const LLT v2s64 = LLT::vector(2, 64);
const LLT v2p0 = LLT::vector(2, p0);
+ const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
+
// FIXME: support subtargets which have neon/fp-armv8 disabled.
if (!ST.hasNEON() || !ST.hasFPARMv8()) {
computeTables();
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+
+ if (TM.getCodeModel() == CodeModel::Small)
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
+ else
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
getActionDefinitionsBuilder(G_PTRTOINT)
.legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
+ // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
+ // G_ADD_LOW instructions.
+ // By splitting this here, we can optimize accesses in the small code model by
+ // folding in the G_ADD_LOW into the load/store offset.
+ auto GV = MI.getOperand(1).getGlobal();
+ if (GV->isThreadLocal())
+ return true; // Don't want to modify TLS vars.
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ auto &TM = ST->getTargetLowering()->getTargetMachine();
+ unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
+
+ if (OpFlags & AArch64II::MO_GOT)
+ return true;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
+ .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ // Set the regclass on the dest reg too.
+ MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
+
+ MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
+ .addGlobalAddress(GV, 0,
+ OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeIntrinsic(
MachineInstr &MI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
+
+ bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
+ const AArch64Subtarget *ST;
};
} // End llvm namespace.
#endif
; CHECK-NEXT: .cfi_offset w26, -80
; CHECK-NEXT: .cfi_offset w27, -88
; CHECK-NEXT: .cfi_offset w28, -96
+; CHECK-NEXT: mov x27, x8
+; CHECK-NEXT: adrp x8, _asdf@PAGE
; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x0, _asdf@PAGE
-; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF
+; CHECK-NEXT: add x0, x8, _asdf@PAGEOFF
; CHECK-NEXT: mov x20, x1
; CHECK-NEXT: mov x21, x2
; CHECK-NEXT: mov x22, x3
; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill
; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
-; CHECK-NEXT: mov x27, x8
; CHECK-NEXT: bl _puts
; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #128] ; 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #224 ; =224
; CHECK-NEXT: b _musttail_variadic_callee
-; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
%r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...)
ret i32 %r
; CHECK-NEXT: ldr x9, [x0, #8]
; CHECK-NEXT: br x9
; CHECK-NEXT: LBB5_2: ; %else
-; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x10, _g@GOTPAGE
; CHECK-NEXT: ldr x9, [x0, #16]
-; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF]
; CHECK-NEXT: mov w11, #42
-; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: Lloh2:
; CHECK-NEXT: str w11, [x10]
; CHECK-NEXT: br x9
-; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4
+; CHECK-NEXT: .loh AdrpLdrGotStr Lloh0, Lloh1, Lloh2
%cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
%cond = load i1, i1* %cond_p
br i1 %cond, label %then, label %else
body: |
bb.1.entry:
%0:_(s8) = G_IMPLICIT_DEF
- %4:_(p0) = G_GLOBAL_VALUE @.str, debug-location !DILocation(line: 0, scope: !4)
+ %4:_(p0) = COPY $x0
%10:_(p0) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !4)
%1:_(s1) = G_TRUNC %0(s8)
%2:_(s32) = G_ZEXT %1(s1)
; CHECK: bb.0 (%ir-block.0):
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
- ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @addr
- ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[GV]](p0) :: (store 8 into @addr)
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @addr
+ ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @addr
+ ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store 8 into @addr)
; CHECK: G_BRINDIRECT [[BLOCK_ADDR]](p0)
; CHECK: bb.1.block (address-taken):
; CHECK: RET_ReallyLR
entry:
ret void
}
- @var = global i8 0
- define i8* @test_global() { ret i8* undef }
...
---
%3:_(s32) = G_ANYEXT %2
$w0 = COPY %3
...
-
----
-name: test_global
-registers:
- - { id: 0, class: _ }
-body: |
- bb.0:
-
- ; CHECK-LABEL: name: test_global
- ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
- ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
- ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
- %0(p0) = G_GLOBAL_VALUE @var
- %1:_(s64) = G_PTRTOINT %0
- $x0 = COPY %1
-...
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -O0 -run-pass=legalizer --relocation-model=pic %s -o - | FileCheck %s --check-prefix=PIC
+# RUN: llc -O0 -run-pass=legalizer --code-model=large %s -o - | FileCheck %s --check-prefix=CMLARGE
+
+--- |
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64--"
+ @var = external global i8
+ define i8* @test_global() { ret i8* undef }
+...
+---
+name: test_global
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0:
+
+ ; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code
+ ; model isn't 'Small'.
+
+ ; CHECK-LABEL: name: test_global
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
+ ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
+ ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
+ ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
+ ; PIC-LABEL: name: test_global
+ ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+ ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
+ ; PIC: $x0 = COPY [[PTRTOINT]](s64)
+ ; CMLARGE-LABEL: name: test_global
+ ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+ ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
+ ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64)
+ %0(p0) = G_GLOBAL_VALUE @var
+ %1:_(s64) = G_PTRTOINT %0
+ $x0 = COPY %1
+...
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_GLOBAL_VALUE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_EXTRACT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
if.end:
ret i32 0
}
+ define i32 @adrp_add() {
+ entry:
+ %0 = load i32, i32* @var1, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+ if.then:
+ store i32 2, i32* @var2, align 4
+ store i32 3, i32* @var1, align 4
+ store i32 2, i32* @var3, align 4
+ store i32 3, i32* @var1, align 4
+ br label %if.end
+
+ if.end:
+ ret i32 0
+ }
define void @test_inttoptr() { ret void }
define void @many_local_use_intra_block() { ret void }
...
---
+name: adrp_add
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: adrp_add
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var1
+ ; CHECK: %addlow1:gpr(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var1
+ ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: [[ADRP1:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var2
+ ; CHECK: %addlow2:gpr(p0) = G_ADD_LOW [[ADRP1]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var2
+ ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: [[ADRP2:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3
+ ; CHECK: %addlow3:gpr(p0) = G_ADD_LOW [[ADRP2]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3
+ ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[ADRP]](p0) :: (load 4 from @var1)
+ ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1.if.then:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[ADRP3:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var2
+ ; CHECK: [[ADD_LOW:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP3]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var2
+ ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW]](p0) :: (store 4 into @var2)
+ ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: [[ADRP4:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var1
+ ; CHECK: [[ADD_LOW1:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP4]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var1
+ ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1)
+ ; CHECK: [[ADRP5:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3
+ ; CHECK: [[ADD_LOW2:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP5]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3
+ ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW2]](p0) :: (store 4 into @var3)
+ ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1)
+ ; CHECK: bb.2.if.end:
+ ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: $w0 = COPY [[C6]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+
+ ; Some of these instructions are dead.
+ bb.1.entry:
+ %1:gpr64(p0) = ADRP target-flags(aarch64-page) @var1
+ %addlow1:gpr(p0) = G_ADD_LOW %1(p0), target-flags(aarch64-pageoff, aarch64-nc) @var1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %4:gpr(s32) = G_CONSTANT i32 2
+ %5:gpr64(p0) = ADRP target-flags(aarch64-page) @var2
+ %addlow2:gpr(p0) = G_ADD_LOW %5(p0), target-flags(aarch64-pageoff, aarch64-nc) @var2
+ %6:gpr(s32) = G_CONSTANT i32 3
+ %7:gpr64(p0) = ADRP target-flags(aarch64-page) @var3
+ %addlow3:gpr(p0) = G_ADD_LOW %7(p0), target-flags(aarch64-pageoff, aarch64-nc) @var3
+ %8:gpr(s32) = G_CONSTANT i32 0
+ %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1)
+ %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
+ %3:gpr(s1) = G_TRUNC %9(s32)
+ G_BRCOND %3(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2.if.then:
+ G_STORE %4(s32), %addlow2(p0) :: (store 4 into @var2)
+ G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1)
+ G_STORE %4(s32), %addlow3(p0) :: (store 4 into @var3)
+ G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1)
+
+ bb.3.if.end:
+ $w0 = COPY %8(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
name: test_inttoptr
alignment: 4
legalized: true
; RUN: -mattr=+call-saved-x18 \
; RUN: -global-isel \
; RUN: -o - %s | FileCheck %s \
-; RUN: --check-prefix=CHECK-SAVED-ALL
+; RUN: --check-prefix=CHECK-SAVED-ALL-GISEL
; Used to exhaust the supply of GPRs.
@var = global [30 x i64] zeroinitializer
; CHECK-SAVED-ALL-DAG: ldr x15
; CHECK-SAVED-ALL-DAG: ldr x18
+; CHECK-SAVED-ALL-GISEL: adrp x16, var
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x8
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x9
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x10
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x11
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x12
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x13
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x14
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x15
+; CHECK-SAVED-ALL-GISEL-DAG: ldr x18
+
call void @callee()
; CHECK: bl callee
; CHECK-NOT: and
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-; FIXME: GlobalISel doesn't fold ands/adds into load/store addressing modes
-; right now/ So, we won't get the :lo12:var.
; GISEL-LABEL: test_load_i8:
; GISEL: ldxrb w[[LOADVAL:[0-9]+]], [x0]
; GISEL-NOT: uxtb
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr)
%shortval = trunc i64 %val to i8
%extval = zext i8 %shortval to i64
; GISEL-LABEL: test_load_i16:
; GISEL: ldxrh w[[LOADVAL:[0-9]+]], [x0]
; GISEL-NOT: uxtb
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr)
%shortval = trunc i64 %val to i16
%extval = zext i16 %shortval to i64
; GISEL-LABEL: test_load_i32:
; GISEL: ldxr w[[LOADVAL:[0-9]+]], [x0]
; GISEL-NOT: uxtb
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr)
%shortval = trunc i64 %val to i32
%extval = zext i32 %shortval to i64
; GISEL-LABEL: test_load_i64:
; GISEL: ldxr x[[LOADVAL:[0-9]+]], [x0]
; GISEL-NOT: uxtb
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr)
store i64 %val, i64* @var, align 8
ret void
; CHECK-NOT: and
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-; FIXME: GlobalISel doesn't fold ands/adds into load/store addressing modes
-; right now/ So, we won't get the :lo12:var.
; GISEL-LABEL: test_load_acquire_i8:
; GISEL: ldaxrb w[[LOADVAL:[0-9]+]], [x0]
-; GISEL-DAG: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL-DAG: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr)
%shortval = trunc i64 %val to i8
%extval = zext i8 %shortval to i64
; GISEL-LABEL: test_load_acquire_i16:
; GISEL: ldaxrh w[[LOADVAL:[0-9]+]], [x0]
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr)
%shortval = trunc i64 %val to i16
%extval = zext i16 %shortval to i64
; GISEL-LABEL: test_load_acquire_i32:
; GISEL: ldaxr w[[LOADVAL:[0-9]+]], [x0]
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr)
%shortval = trunc i64 %val to i32
%extval = zext i32 %shortval to i64
; GISEL-LABEL: test_load_acquire_i64:
; GISEL: ldaxr x[[LOADVAL:[0-9]+]], [x0]
-; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}]
+; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
%val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr)
store i64 %val, i64* @var, align 8
ret void
; DAG-ISEL: ldr w0, [x8, ext]
; FAST-ISEL: add x8, x8, ext
; FAST-ISEL: ldr w0, [x8]
-; GLOBAL-ISEL-FALLBACK: add x8, x8, ext
-; GLOBAL-ISEL-FALLBACK: ldr w0, [x8]
+; GLOBAL-ISEL-FALLBACK: ldr w0, [x8, ext]
; CHECK: ret
define i32* @get_var_pointer() {