Preserve aliasing info during memory intrinsics lowering
authorEvgeniy Brevnov <ybrevnov@azul.com>
Tue, 25 Jan 2022 06:17:57 +0000 (13:17 +0700)
committerEvgeniy Brevnov <ybrevnov@azul.com>
Wed, 6 Apr 2022 04:33:54 +0000 (11:33 +0700)
By specification, source and destination of llvm.memcpy.* must either be equal or non-overlapping. This semantics is hard or impossible to figure out once lowered. This patch explicitly marks loads from source and stores to destination as not aliasing if source and destination is known to be not equal.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D118441

llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
llvm/unittests/Transforms/Utils/MemTransferLowering.cpp

index 8d09560..a46b7d4 100644 (file)
@@ -21,6 +21,7 @@ class Instruction;
 class MemCpyInst;
 class MemMoveInst;
 class MemSetInst;
+class ScalarEvolution;
 class TargetTransformInfo;
 class Value;
 struct Align;
@@ -28,9 +29,9 @@ struct Align;
 /// Emit a loop implementing the semantics of llvm.memcpy where the size is not
 /// a compile-time constant. Loop will be insterted at \p InsertBefore.
 void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr,
-                                 Value *DstAddr, Value *CopyLen,
-                                 Align SrcAlign, Align DestAlign,
-                                 bool SrcIsVolatile, bool DstIsVolatile,
+                                 Value *DstAddr, Value *CopyLen, Align SrcAlign,
+                                 Align DestAlign, bool SrcIsVolatile,
+                                 bool DstIsVolatile, bool CanOverlap,
                                  const TargetTransformInfo &TTI);
 
 /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
@@ -39,10 +40,11 @@ void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
                                Value *DstAddr, ConstantInt *CopyLen,
                                Align SrcAlign, Align DestAlign,
                                bool SrcIsVolatile, bool DstIsVolatile,
-                               const TargetTransformInfo &TTI);
+                               bool CanOverlap, const TargetTransformInfo &TTI);
 
 /// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
-void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI);
+void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
+                        ScalarEvolution *SE = nullptr);
 
 /// Expand \p MemMove as a loop. \p MemMove is not deleted.
 void expandMemMoveAsLoop(MemMoveInst *MemMove);
index f655f25..f57c292 100644 (file)
@@ -115,7 +115,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
                               /* SrcAlign */ LI->getAlign(),
                               /* DestAlign */ SI->getAlign(),
                               /* SrcIsVolatile */ LI->isVolatile(),
-                              /* DstIsVolatile */ SI->isVolatile(), TTI);
+                              /* DstIsVolatile */ SI->isVolatile(),
+                              /* CanOverlap */ true, TTI);
 
     SI->eraseFromParent();
     LI->eraseFromParent();
index c43cb7d..3848e34 100644 (file)
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
@@ -18,6 +20,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
                                      Value *DstAddr, ConstantInt *CopyLen,
                                      Align SrcAlign, Align DstAlign,
                                      bool SrcIsVolatile, bool DstIsVolatile,
+                                     bool CanOverlap,
                                      const TargetTransformInfo &TTI) {
   // No need to expand zero length copies.
   if (CopyLen->isZero())
@@ -28,6 +31,10 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
   Function *ParentFunc = PreLoopBB->getParent();
   LLVMContext &Ctx = PreLoopBB->getContext();
   const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+  MDBuilder MDB(Ctx);
+  MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+  StringRef Name = "MemCopyAliasScope";
+  MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
 
   unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
   unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
@@ -68,12 +75,21 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
     // Loop Body
     Value *SrcGEP =
         LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
-    Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
-                                                PartSrcAlign, SrcIsVolatile);
+    LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+                                                   PartSrcAlign, SrcIsVolatile);
+    if (!CanOverlap) {
+      // Set alias scope for loads.
+      Load->setMetadata(LLVMContext::MD_alias_scope,
+                        MDNode::get(Ctx, NewScope));
+    }
     Value *DstGEP =
         LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
-    LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+    StoreInst *Store = LoopBuilder.CreateAlignedStore(
+        Load, DstGEP, PartDstAlign, DstIsVolatile);
+    if (!CanOverlap) {
+      // Indicate that stores don't overlap loads.
+      Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+    }
     Value *NewIndex =
         LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
     LoopIndex->addIncoming(NewIndex, LoopBB);
@@ -111,9 +127,13 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
                              : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
       Value *SrcGEP = RBuilder.CreateInBoundsGEP(
           OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
-      Value *Load =
+      LoadInst *Load =
           RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
-
+      if (!CanOverlap) {
+        // Set alias scope for loads.
+        Load->setMetadata(LLVMContext::MD_alias_scope,
+                          MDNode::get(Ctx, NewScope));
+      }
       // Cast destination to operand type and store.
       PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
       Value *CastedDst = DstAddr->getType() == DstPtrType
@@ -121,8 +141,12 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
                              : RBuilder.CreateBitCast(DstAddr, DstPtrType);
       Value *DstGEP = RBuilder.CreateInBoundsGEP(
           OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
-      RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+      StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+                                                     DstIsVolatile);
+      if (!CanOverlap) {
+        // Indicate that stores don't overlap loads.
+        Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+      }
       BytesCopied += OperandSize;
     }
   }
@@ -134,7 +158,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
                                        Value *SrcAddr, Value *DstAddr,
                                        Value *CopyLen, Align SrcAlign,
                                        Align DstAlign, bool SrcIsVolatile,
-                                       bool DstIsVolatile,
+                                       bool DstIsVolatile, bool CanOverlap,
                                        const TargetTransformInfo &TTI) {
   BasicBlock *PreLoopBB = InsertBefore->getParent();
   BasicBlock *PostLoopBB =
@@ -143,6 +167,11 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
   Function *ParentFunc = PreLoopBB->getParent();
   const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
   LLVMContext &Ctx = PreLoopBB->getContext();
+  MDBuilder MDB(Ctx);
+  MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+  StringRef Name = "MemCopyAliasScope";
+  MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+
   unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
   unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
 
@@ -183,11 +212,19 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
   LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
 
   Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
-  Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign,
-                                              SrcIsVolatile);
+  LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+                                                 PartSrcAlign, SrcIsVolatile);
+  if (!CanOverlap) {
+    // Set alias scope for loads.
+    Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
+  }
   Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
-  LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+  StoreInst *Store =
+      LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
+  if (!CanOverlap) {
+    // Indicate that stores don't overlap loads.
+    Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+  }
   Value *NewIndex =
       LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
   LoopIndex->addIncoming(NewIndex, LoopBB);
@@ -237,12 +274,21 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
     Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
     Value *SrcGEP =
         ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
-    Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign,
-                                               SrcIsVolatile);
+    LoadInst *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP,
+                                                  PartSrcAlign, SrcIsVolatile);
+    if (!CanOverlap) {
+      // Set alias scope for loads.
+      Load->setMetadata(LLVMContext::MD_alias_scope,
+                        MDNode::get(Ctx, NewScope));
+    }
     Value *DstGEP =
         ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
-    ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+    StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+                                                     DstIsVolatile);
+    if (!CanOverlap) {
+      // Indicate that stores don't overlap loads.
+      Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+    }
     Value *ResNewIndex =
         ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
     ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
@@ -426,7 +472,16 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
 }
 
 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
-                              const TargetTransformInfo &TTI) {
+                              const TargetTransformInfo &TTI,
+                              ScalarEvolution *SE) {
+  bool CanOverlap = true;
+  if (SE) {
+    auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
+    auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
+    if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
+      CanOverlap = false;
+  }
+
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
     createMemCpyLoopKnownSize(
         /* InsertBefore */ Memcpy,
@@ -437,6 +492,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
         /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
         /* SrcIsVolatile */ Memcpy->isVolatile(),
         /* DstIsVolatile */ Memcpy->isVolatile(),
+        /* CanOverlap */ CanOverlap,
         /* TargetTransformInfo */ TTI);
   } else {
     createMemCpyLoopUnknownSize(
@@ -448,6 +504,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
         /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
         /* SrcIsVolatile */ Memcpy->isVolatile(),
         /* DstIsVolatile */ Memcpy->isVolatile(),
+        /* CanOverlap */ CanOverlap,
         /* TargetTransformInfo */ TTI);
   }
 }
index 3d515fb..df86e16 100644 (file)
@@ -119,19 +119,15 @@ TEST_F(MemTransferLowerTest, MemCpyKnownLength) {
         auto *MemCpyBB = getBasicBlockByName(F, "memcpy");
         Instruction *Inst = &MemCpyBB->front();
         MemCpyInst *MemCpyI = cast<MemCpyInst>(Inst);
-        expandMemCpyAsLoop(MemCpyI, TTI);
+        auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
+        expandMemCpyAsLoop(MemCpyI, TTI, &SE);
         auto *CopyLoopBB = getBasicBlockByName(F, "load-store-loop");
         Instruction *LoadInst =
             getInstructionByOpcode(*CopyLoopBB, Instruction::Load, 1);
-        EXPECT_NONFATAL_FAILURE(
-            EXPECT_NE(LoadInst->getMetadata(LLVMContext::MD_alias_scope),
-                      nullptr),
-            "");
+        EXPECT_NE(nullptr, LoadInst->getMetadata(LLVMContext::MD_alias_scope));
         Instruction *StoreInst =
             getInstructionByOpcode(*CopyLoopBB, Instruction::Store, 1);
-        EXPECT_NONFATAL_FAILURE(
-            EXPECT_NE(StoreInst->getMetadata(LLVMContext::MD_noalias), nullptr),
-            "");
+        EXPECT_NE(nullptr, StoreInst->getMetadata(LLVMContext::MD_noalias));
         return PreservedAnalyses::none();
       }));
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
@@ -163,14 +159,15 @@ TEST_F(MemTransferLowerTest, VecMemCpyKnownLength) {
         auto *MemCpyBB = getBasicBlockByName(F, "memcpy");
         Instruction *Inst = &MemCpyBB->front();
         MemCpyInst *MemCpyI = cast<MemCpyInst>(Inst);
-        expandMemCpyAsLoop(MemCpyI, TTI);
+        auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
+        expandMemCpyAsLoop(MemCpyI, TTI, &SE);
         return PreservedAnalyses::none();
       }));
   FPM.addPass(LoopVectorizePass(LoopVectorizeOptions()));
   FPM.addPass(ForwardingPass(
       [=](Function &F, FunctionAnalysisManager &FAM) -> PreservedAnalyses {
         auto *TargetBB = getBasicBlockByName(F, "vector.body");
-        EXPECT_NONFATAL_FAILURE(EXPECT_NE(TargetBB, nullptr), "");
+        EXPECT_NE(nullptr, TargetBB);
         return PreservedAnalyses::all();
       }));
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));