[MemCpyOpt] Teach memcpyopt to handle loads from the constant memory.

author Michael Liao <michael.hliao@gmail.com>

Thu, 5 Aug 2021 20:48:49 +0000 (16:48 -0400)

committer Michael Liao <michael.hliao@gmail.com>

Fri, 6 Aug 2021 16:43:52 +0000 (12:43 -0400)
author Michael Liao <michael.hliao@gmail.com>
Thu, 5 Aug 2021 20:48:49 +0000 (16:48 -0400)
committer Michael Liao <michael.hliao@gmail.com>
Fri, 6 Aug 2021 16:43:52 +0000 (12:43 -0400)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index 6ad0417..0dd0b45 100644 (file)
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -708,9 +708,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
          if (P) {
            // If we load from memory that may alias the memory we store to,
            // memmove must be used to preserve semantic. If not, memcpy can
-          // be used.
+          // be used. Also, if we load from constant memory, memcpy can be used
+          // as the constant memory won't be modified.
            bool UseMemMove = false;
-          if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+          if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
              UseMemMove = true;
  
            uint64_t Size = DL.getTypeStoreSize(T);
@@ -1102,11 +1103,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
    }
  
    // If the dest of the second might alias the source of the first, then the
-  // source and dest might overlap.  We still want to eliminate the intermediate
-  // value, but we have to generate a memmove instead of memcpy.
+  // source and dest might overlap. In addition, if the source of the first
+  // points to constant memory, they won't overlap by definition. Otherwise, we
+  // still want to eliminate the intermediate value, but we have to generate a
+  // memmove instead of memcpy.
    bool UseMemMove = false;
-  if (!AA->isNoAlias(MemoryLocation::getForDest(M),
-                     MemoryLocation::getForSource(MDep)))
+  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
      UseMemMove = true;
  
    // If all checks passed, then we can transform M.
@@ -1168,10 +1170,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
  
    // Check that src and dst of the memcpy aren't the same. While memcpy
    // operands cannot partially overlap, exact equality is allowed.
-  if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
-                                    LocationSize::precise(1)),
-                     MemoryLocation(MemCpy->getDest(),
-                                    LocationSize::precise(1))))
+  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
      return false;
  
    if (EnableMemorySSA) {
@@ -1560,9 +1559,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
  /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
  /// not to alias.
  bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
-  // See if the pointers alias.
-  if (!AA->isNoAlias(MemoryLocation::getForDest(M),
-                     MemoryLocation::getForSource(M)))
+  // See if the source could be modified by this memmove potentially.
+  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
      return false;
  
    LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
diff --git a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll

index a506b6c..a803679 100644 (file)
--- a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
@@ -4,6 +4,9 @@
  
  %T = type { i8, i32 }
  
+; A global constant of %T
+@C = external constant %T
+
  ; Ensure load-store forwarding of an aggregate is interpreted as
  ; a memmove when the source and dest may alias
  define void @test_memmove(%T* align 8 %a, %T* align 16 %b) {
@@ -32,6 +35,17 @@ define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) {
    ret void
  }
  
+define void @test_memcpy_constant(%T* %d) {
+; CHECK-LABEL: @test_memcpy_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[D:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 getelementptr inbounds ([[T:%.*]], %T* @C, i32 0, i32 0), i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* @C, align 8
+  store %T %val, %T* %d, align 16
+  ret void
+}
+
  ; memcpy(%d, %a) should not be generated since store2 may-aliases load %a.
  define void @f(%T* %a, %T* %b, %T* %c, %T* %d) {
  ; CHECK-LABEL: @f(
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll

index 993e702..3ca9caf 100644 (file)
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -8,6 +8,8 @@ target triple = "i686-apple-darwin9"
  %0 = type { x86_fp80, x86_fp80 }
  %1 = type { i32, i32 }
  
+@C = external constant [0 x i8]
+
  declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i1) nounwind
  declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
  declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
@@ -22,7 +24,7 @@ define void @test1(%0* sret(%0)  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) noun
  ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [[TMP0:%.*]], align 16
  ; CHECK-NEXT:    [[MEMTMP:%.*]] = alloca [[TMP0]], align 16
  ; CHECK-NEXT:    [[TMP5:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[Z_1:%.*]]
-; CHECK-NEXT:    call void @ccoshl(%0* sret([[TMP0]]) [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 [[Z_0:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    call void @ccoshl(%0* sret([[TMP0]]) [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 [[Z_0:%.*]]) #[[ATTR2:[0-9]+]]
  ; CHECK-NEXT:    [[TMP219:%.*]] = bitcast %0* [[TMP2]] to i8*
  ; CHECK-NEXT:    [[MEMTMP20:%.*]] = bitcast %0* [[MEMTMP]] to i8*
  ; CHECK-NEXT:    [[AGG_RESULT21:%.*]] = bitcast %0* [[AGG_RESULT:%.*]] to i8*
@@ -66,6 +68,23 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
  
  ; The intermediate alloca and one of the memcpy's should be eliminated, the
  ; other should be related with a memcpy.
+define void @test2_constant(i8* %Q) nounwind  {
+; CHECK-LABEL: @test2_constant(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P]], i32 32, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  %P = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  ret void
+
+}
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memcpy.
  define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind  {
  ; CHECK-LABEL: @test2_memcpy(
  ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P:%.*]], i32 32, i1 false)
@@ -299,7 +318,7 @@ define void @test6(i8 *%P) {
  define i32 @test7(%struct.p* nocapture align 8 byval(%struct.p) %q) nounwind ssp {
  ; CHECK-LABEL: @test7(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @g(%struct.p* byval([[STRUCT_P:%.*]]) align 8 [[Q:%.*]]) #[[ATTR0]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @g(%struct.p* byval([[STRUCT_P:%.*]]) align 8 [[Q:%.*]]) #[[ATTR2]]
  ; CHECK-NEXT:    ret i32 [[CALL]]
  ;
  entry:
diff --git a/llvm/test/Transforms/MemCpyOpt/memmove.ll b/llvm/test/Transforms/MemCpyOpt/memmove.ll

index f3f1817..2f86b08 100644 (file)
--- a/llvm/test/Transforms/MemCpyOpt/memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memmove.ll
@@ -6,6 +6,8 @@
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
  target triple = "x86_64-apple-darwin9.0"
  
+@C = external constant [0 x i8]
+
  declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
  
  define i8* @test1(i8* nocapture %src) nounwind {
@@ -54,3 +56,14 @@ entry:
    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false)
    ret void
  }
+
+define void @test4(i8* %P) nounwind {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[ADD_PTR]], i64 17, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %add.ptr = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false)
+  ret void
+}
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll

index c1255f5..460d317 100644 (file)
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
@@ -4,6 +4,25 @@
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  
+@C = external constant [0 x i8]
+
+define void @test_constant(i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) {
+; CHECK-LABEL: @test_constant(
+; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[SRC_SIZE]]
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC]], i64 [[SRC_SIZE]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false)
+  %src = getelementptr inbounds [0 x i8], [0 x i8]* @C, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+  ret void
+}
+
  define void @test(i8* %src, i64 %src_size, i8* noalias %dst, i64 %dst_size, i8 %c) {
  ; CHECK-LABEL: @test(
  ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]]
author	Michael Liao <michael.hliao@gmail.com>
	Thu, 5 Aug 2021 20:48:49 +0000 (16:48 -0400)
committer	Michael Liao <michael.hliao@gmail.com>
	Fri, 6 Aug 2021 16:43:52 +0000 (12:43 -0400)
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp		patch \| blob \| history
llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll		patch \| blob \| history
llvm/test/Transforms/MemCpyOpt/memcpy.ll		patch \| blob \| history
llvm/test/Transforms/MemCpyOpt/memmove.ll		patch \| blob \| history
llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll		patch \| blob \| history