Update the memcpy rewriting to fully support widened int rewriting. This

author Chandler Carruth <chandlerc@gmail.com>

Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp

index 419756d..7d2ce09 100644 (file)
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2717,9 +2717,9 @@ private:
      // If this doesn't map cleanly onto the alloca type, and that type isn't
      // a single value type, just emit a memcpy.
      bool EmitMemCpy
-      = !VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !NewAI.getAllocatedType()->isSingleValueType());
+      = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                             EndOffset != NewAllocaEndOffset ||
+                             !NewAI.getAllocatedType()->isSingleValueType());
  
      // If we're just going to emit a memcpy, the alloca hasn't changed, and the
      // size hasn't been shrunk based on analysis of the viable range, this is
@@ -2741,14 +2741,23 @@ private:
      if (Pass.DeadSplitInsts.insert(&II))
        Pass.DeadInsts.push_back(&II);
  
-    bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                                     EndOffset < NewAllocaEndOffset);
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    bool IsVectorElement = VecTy && !IsWholeAlloca;
+    uint64_t Size = EndOffset - BeginOffset;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
  
      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
                                : II.getRawDest()->getType();
-    if (!EmitMemCpy)
-      OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo()
-                                   : NewAI.getType();
+    if (!EmitMemCpy) {
+      if (IsVectorElement)
+        OtherPtrTy = VecTy->getElementType()->getPointerTo();
+      else if (IntTy && !IsWholeAlloca)
+        OtherPtrTy = SubIntTy->getPointerTo();
+      else
+        OtherPtrTy = NewAI.getType();
+    }
  
      // Compute the other pointer, folding as much as possible to produce
      // a single, simple GEP in most cases.
@@ -2795,11 +2804,20 @@ private:
          IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
          getIndex(IRB, BeginOffset),
          getName(".copyextract"));
+    } else if (IntTy && !IsWholeAlloca && !IsDest) {
+      Src = extractInteger(IRB, SubIntTy, BeginOffset);
      } else {
        Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
                                    getName(".copyload"));
      }
  
+    if (IntTy && !IsWholeAlloca && IsDest) {
+      StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
+    }
+
      if (IsVectorElement && IsDest) {
        // We have to insert into a loaded copy before storing.
        Src = IRB.CreateInsertElement(
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll

index b33ffa6..644fda1 100644 (file)
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -1046,8 +1046,12 @@ entry:
    ; Or a memset of the whole thing.
    call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
  
-  ; Store to the high 32-bits...
+  ; Write to the high 32-bits with a memcpy.
    %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
+  %d.raw = bitcast double* %d to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+
+  ; Store to the high 32-bits...
    %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
    store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 15 Oct 2012 10:24:43 +0000 (10:24 +0000)
llvm/lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
llvm/test/Transforms/SROA/basictest.ll		patch \| blob \| history