From 32b6c17b29079e7d2ac61cdc90b10983ee97d78d Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 23 Nov 2021 09:47:56 +0000 Subject: [PATCH] [SDAG] Use UnknownSize for masked load/store MMO size A masked load or store will load a potentially unknown number of bytes from a memory location - that is not generally known at compile time. They do not necessarily load/store the entire vector width, and treating them as such can lead to incorrect aliasing information (for example, if the underlying object is smaller than the size of the vector). This makes sure that the MMO is given an unknown size to represent this. which is less accurate that "may load/store from up to 16 bytes", but less incorrect that "will load/store from 16 bytes". Differential Revision: https://reviews.llvm.org/D113888 --- llvm/include/llvm/CodeGen/MachineFunction.h | 3 ++- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 21 +++++++++------------ .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 16 +++------------- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 11 +++++++---- llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll | 8 ++++---- llvm/test/CodeGen/X86/masked_compressstore.ll | 8 ++++---- llvm/test/CodeGen/X86/vmaskmov-offset.ll | 16 ++++++++-------- 7 files changed, 37 insertions(+), 46 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index dcbd19a..ec23dde 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -938,7 +938,8 @@ public: int64_t Offset, LLT Ty); MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size)); + return getMachineMemOperand( + MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size)); } /// getMachineMemOperand - Allocate a new MachineMemOperand by copying diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 539c9cb..7ec2638 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MLD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(), + MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Generate hi masked load. Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) @@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, LoMemVT.getStoreSize().getFixedSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(), - MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, - N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MPI = N->getPointerInfo().getWithOffset( LoMemVT.getStoreSize().getFixedSize()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), - N->getRanges()); + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5d911c1..75fa8a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); + MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - MemoryLocation ML; - if (VT.isScalableVector()) - ML = MemoryLocation::getAfter(PtrOperand); - else - ML = MemoryLocation(PtrOperand, LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 8900fca..f7237f4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -9,6 +9,7 @@ #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" @@ -1846,16 +1847,18 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = MemN->getChain(); SDValue Base0 = MemN->getBasePtr(); SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); + unsigned MemOpc = MemN->getOpcode(); MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; if (MachineMemOperand *MMO = MemN->getMemOperand()) { MachineFunction &MF = DAG.getMachineFunction(); - MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); - MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); + uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) + ? (uint64_t)MemoryLocation::UnknownSize + : HwLen; + MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); + MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); } - unsigned MemOpc = MemN->getOpcode(); - if (MemOpc == ISD::LOAD) { assert(cast(Op)->isUnindexed()); SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll index 0e5b005..c0f1cbf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll @@ -11,15 +11,15 @@ define i32 @incorrectmmo() { ; CHECK-NEXT: adr r0, .LCPI0_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: add.w r0, sp, #2 -; CHECK-NEXT: ldrb.w r1, [sp, #3] ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: ldrb.w r2, [sp, #4] ; CHECK-NEXT: ldrb.w r0, [sp, #2] -; CHECK-NEXT: ldrb.w r3, [sp, #10] +; CHECK-NEXT: ldrb.w r1, [sp, #3] +; CHECK-NEXT: ldrb.w r2, [sp, #4] ; CHECK-NEXT: add r0, r1 -; CHECK-NEXT: ldrb.w r1, [sp, #11] +; CHECK-NEXT: ldrb.w r3, [sp, #10] ; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: ldrb.w r1, [sp, #11] ; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: add sp, #12 diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll index e353367..9bc7335 100644 --- a/llvm/test/CodeGen/X86/masked_compressstore.ll +++ b/llvm/test/CodeGen/X86/masked_compressstore.ll @@ -516,8 +516,6 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 -; AVX512F-NEXT: kshiftrw $8, %k1, %k2 -; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: kmovw %k1, %eax ; AVX512F-NEXT: movzbl %al, %eax ; AVX512F-NEXT: movl %eax, %ecx @@ -535,7 +533,9 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; AVX512F-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F ; AVX512F-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101 ; AVX512F-NEXT: shrl $24, %eax +; AVX512F-NEXT: kshiftrw $8, %k1, %k2 ; AVX512F-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2} +; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -570,8 +570,6 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; AVX512VLBW: ## %bb.0: ; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovb2m %xmm2, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2 -; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512VLBW-NEXT: kmovd %k1, %eax ; AVX512VLBW-NEXT: movzbl %al, %eax ; AVX512VLBW-NEXT: movl %eax, %ecx @@ -589,7 +587,9 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; AVX512VLBW-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F ; AVX512VLBW-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101 ; AVX512VLBW-NEXT: shrl $24, %eax +; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2 ; AVX512VLBW-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2} +; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512VLBW-NEXT: vzeroupper ; AVX512VLBW-NEXT: retq call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll index 42ee66d..2682fd3 100644 --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -14,10 +14,10 @@ define void @test_v16f(<16 x i32> %x) { ; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK-NEXT: [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]] ; CHECK-NEXT: [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) - ; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4) ; CHECK-NEXT: RET 0 bb: %stack_input_vec = alloca <16 x float>, align 64 @@ -41,10 +41,10 @@ define void @test_v8d(<8 x i64> %x) { ; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK-NEXT: [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]] ; CHECK-NEXT: [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) - ; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4) ; CHECK-NEXT: RET 0 bb: %stack_input_vec = alloca <8 x double>, align 64 -- 2.7.4