namespace llvm {
+class AssumptionCache;
class CCState;
class CCValAssign;
class Constant;
return MachineMemOperand::MONone;
}
- MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI,
- const DataLayout &DL) const;
+ MachineMemOperand::Flags
+ getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
+ AssumptionCache *AC = nullptr,
+ const TargetLibraryInfo *LibInfo = nullptr) const;
MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
const DataLayout &DL) const;
MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
}
auto &TLI = *MF->getSubtarget().getTargetLowering();
- MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
+ MachineMemOperand::Flags Flags =
+ TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
if (AA->pointsToConstantMemory(
MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
}
}
- if (!(Flags & MachineMemOperand::MODereferenceable)) {
- if (isDereferenceableAndAlignedPointer(Ptr, LI.getType(), LI.getAlign(),
- *DL, &LI, AC, nullptr, LibInfo))
- Flags |= MachineMemOperand::MODereferenceable;
- }
-
const MDNode *Ranges =
Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
bool isVolatile = I.isVolatile();
MachineMemOperand::Flags MMOFlags =
- TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
SDValue Root;
bool ConstantMemory = false;
Root = DAG.getRoot();
}
- if (isDereferenceableAndAlignedPointer(SV, Ty, Alignment, DAG.getDataLayout(),
- &I, AC, nullptr, LibInfo))
- MMOFlags |= MachineMemOperand::MODereferenceable;
-
SDLoc dl = getCurSDLoc();
if (isVolatile)
I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
- auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
MF.getRegInfo().freezeReservedRegs(MF);
}
-MachineMemOperand::Flags
-TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
- const DataLayout &DL) const {
+MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags(
+ const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC,
+ const TargetLibraryInfo *LibInfo) const {
MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
if (LI.isVolatile())
Flags |= MachineMemOperand::MOVolatile;
if (LI.hasMetadata(LLVMContext::MD_invariant_load))
Flags |= MachineMemOperand::MOInvariant;
- if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL))
+ if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(),
+ LI.getAlign(), DL, &LI, AC,
+ /*DT=*/nullptr, LibInfo))
Flags |= MachineMemOperand::MODereferenceable;
Flags |= getTargetMMOFlags(LI);
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr)
; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%load = load i32, ptr %ptr, align 4
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr)
; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%load = load i32, ptr %ptr, align 4, !invariant.load !0
; CHECK-LABEL: name: stack_passed_i64
; CHECK: fixedStack:
-; CHECK: - { id: 0, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
+; CHECK: - { id: 0, type: default, offset: 16, size: 8, alignment: 16, stack-id: default,
; CHECK-NEXT: isImmutable: false, isAliased: false,
-; CHECK: - { id: 1, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+; CHECK: - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
+; CHECK-NEXT: isImmutable: false, isAliased: false,
+; CHECK: - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
; CHECK-NEXT: isImmutable: true, isAliased: false,
define void @stack_passed_i64(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6,
- i64 %arg7, i64 %arg8, ptr byval(i64) %arg9) {
+ i64 %arg7, i64 %arg8, ptr byval(i64) %arg9, ptr byval(i64) align(8) %arg10) {
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
- ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
- ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX1]](p0)
- ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (dereferenceable load (s64) from %ir.arg9)
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]]
- ; CHECK: G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9)
- ; CHECK: RET_ReallyLR
- %load = load i64, ptr %arg9
- %add = add i64 %load, %arg8
- store volatile i64 %add, ptr %arg9
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[COPY9:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX2]](p0)
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (load (s64) from %ir.arg9)
+ ; CHECK: [[ADD0:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]]
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY9]](p0) :: (dereferenceable load (s64) from %ir.arg10)
+ ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD0]], [[LOAD2]]
+ ; CHECK: G_STORE [[ADD1]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9)
+
+
+; CHECK: RET_ReallyLR
+ %load0 = load i64, ptr %arg9
+ %add0 = add i64 %load0, %arg8
+ %load1 = load i64, ptr %arg10
+ %add1 = add i64 %add0, %load1
+ store volatile i64 %add1, ptr %arg9
ret void
}
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from %ir.arg0 + 4, addrspace 5)
; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (s32) from %ir.arg0 + 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
- ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (s32) from %ir.arg1 + 4, addrspace 5)
; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5)
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: SI_RETURN
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
-define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
+define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
; GCN-LABEL: name: mmo_offsets0
; GCN: bb.0.bb.0:
; GCN-NEXT: liveins: $sgpr0, $vgpr0
; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-NEXT: s_add_u32 s8, s8, 8
; GISEL-NEXT: s_addc_u32 s9, s9, 0
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
; GISEL-NEXT: s_mov_b32 s14, s16
; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT: s_add_u32 s8, s8, 8
; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: s_add_u32 s8, s8, 8
; GISEL-NEXT: s_addc_u32 s9, s9, 0
-; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GISEL-NEXT: s_mov_b32 s14, s16
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
ret void
}
-define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
; SI-LABEL: byref_natural_align_constant_v16i32_arg:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19
; and dereferenceable flags.
; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load (s16) from %ir.ptr, addrspace 4)
-define half @legalize_f16_load(ptr addrspace(4) dereferenceable(4) %ptr) {
+define half @legalize_f16_load_align2(ptr addrspace(4) dereferenceable(4) align(2) %ptr) {
+ %load = load half, ptr addrspace(4) %ptr, !invariant.load !0
+ %add = fadd half %load, 1.0
+ ret half %add
+}
+
+; GCN: BUFFER_LOAD_USHORT{{.*}} :: (invariant load (s16) from %ir.ptr, addrspace 4)
+define half @legalize_f16_load_align1(ptr addrspace(4) dereferenceable(4) align(1) %ptr) {
%load = load half, ptr addrspace(4) %ptr, !invariant.load !0
%add = fadd half %load, 1.0
ret half %add
; CHECK: return $1{{$}}
; NOREGS-LABEL: no1:
; NOREGS: return{{$}}
-define i32 @no1(ptr %p, ptr dereferenceable(4) %q) {
+define i32 @no1(ptr %p, ptr dereferenceable(4) align(4) %q) {
%t = load volatile i32, ptr %q, !invariant.load !0
store volatile i32 0, ptr %p
ret i32 %t
; CHECK: return $pop{{[0-9]+}}{{$}}
; NOREGS-LABEL: yes0:
; NOREGS: return{{$}}
-define i32 @yes0(ptr %p, ptr dereferenceable(4) %q) {
+define i32 @yes0(ptr %p, ptr dereferenceable(4) align(4) %q) {
%t = load i32, ptr %q, !invariant.load !0
store i32 0, ptr %p
ret i32 %t
; NOREGS: call callee
; NOREGS: i32.load 0
; NOREGS: return
-define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) %p2) {
+define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) align(4) %p2) {
store i32 %a, ptr %p1
%b = load i32, ptr %p2, !invariant.load !0
call i32 @callee(i32 %a)
%0 = type { i64 }
%struct.S1 = type { i16, i32 }
-@g_10 = external dso_local global %struct.S1
+@g_10 = external dso_local global %struct.S1, align 8
declare void @func_28(i64, i64)
declare ptr @objc_msgSend(ptr, ptr, ...) nonlazybind
-define void @test_multi_def(ptr dereferenceable(8) %x1,
+define void @test_multi_def(ptr dereferenceable(8) align(8) %x1,
; CHECK-LABEL: test_multi_def:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: jl LBB4_2
; CHECK-NEXT: ## %bb.3: ## %exit
; CHECK-NEXT: retq
- ptr dereferenceable(8) %x2,
+ ptr dereferenceable(8) align(8) %x2,
ptr %y, i64 %count) nounwind nofree nosync {
entry:
br label %for.body
ret void
}
-define void @test_div_def(ptr dereferenceable(8) %x1,
+define void @test_div_def(ptr dereferenceable(8) align(8) %x1,
; CHECK-LABEL: test_div_def:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: jl LBB5_2
; CHECK-NEXT: ## %bb.3: ## %exit
; CHECK-NEXT: retq
- ptr dereferenceable(8) %x2,
+ ptr dereferenceable(8) align(8) %x2,
ptr %y, i32 %count) nounwind nofree nosync {
entry:
br label %for.body