Teach SROA to cope with wrapper aggregates. These show up a lot in ABI

author Chandler Carruth <chandlerc@gmail.com>

Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp

index cb6da3b..d6ddaae 100644 (file)
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3049,6 +3049,36 @@ private:
  };
  }
  
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+  if (Ty->isSingleValueType())
+    return Ty;
+
+  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+  Type *InnerTy;
+  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+    InnerTy = ArrTy->getElementType();
+  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    unsigned Index = SL->getElementContainingOffset(0);
+    InnerTy = STy->getElementType(Index);
+  } else {
+    return Ty;
+  }
+
+  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+      TypeSize > DL.getTypeSizeInBits(InnerTy))
+    return Ty;
+
+  return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
  /// \brief Try to find a partition of the aggregate type passed in for a given
  /// offset and size.
  ///
@@ -3065,7 +3095,7 @@ private:
  static Type *getTypePartition(const DataLayout &TD, Type *Ty,
                                uint64_t Offset, uint64_t Size) {
    if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
-    return Ty;
+    return stripAggregateTypeWrapping(TD, Ty);
  
    if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
      // We can't partition pointers...
@@ -3094,7 +3124,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
      assert(Offset == 0);
  
      if (Size == ElementSize)
-      return ElementTy;
+      return stripAggregateTypeWrapping(TD, ElementTy);
      assert(Size > ElementSize);
      uint64_t NumElements = Size / ElementSize;
      if (NumElements * ElementSize != Size)
@@ -3130,7 +3160,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
    assert(Offset == 0);
  
    if (Size == ElementSize)
-    return ElementTy;
+    return stripAggregateTypeWrapping(TD, ElementTy);
  
    StructType::element_iterator EI = STy->element_begin() + Index,
                                 EE = STy->element_end();
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll

index 945ad91..ad5fb6c 100644 (file)
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -84,37 +84,6 @@ entry:
    ret void
  }
  
-%struct.S = type { i8, { i64 } }
-
-define void @test4() {
-; This test case triggered very strange alignment behavior with memcpy due to
-; strange splitting. Reported by Duncan.
-; CHECK: @test4
-
-entry:
-  %D.2113 = alloca %struct.S
-  %Op = alloca %struct.S
-  %D.2114 = alloca %struct.S
-  %gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0
-  store i8 0, i8* %gep1, align 8
-  %gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0
-  %cast = bitcast i64* %gep2 to double*
-  store double 0.000000e+00, double* %cast, align 8
-  store i64 0, i64* %gep2, align 8
-  %dst1 = bitcast %struct.S* %D.2114 to i8*
-  %src1 = bitcast %struct.S* %Op to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false)
-  %dst2 = bitcast %struct.S* %D.2113 to i8*
-  %src2 = bitcast %struct.S* %D.2114 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false)
-; We get 3 memcpy calls with various reasons to shrink their alignment to 1.
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false)
-
-  ret void
-}
-
  define void @test5() {
  ; Test that we preserve underaligned loads and stores when splitting.
  ; CHECK: @test5
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll

index e7767ef..7d8e5cd 100644 (file)
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -968,3 +968,47 @@ entry:
    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
    ret void
  }
+
+define i32 @test22(i32 %x) {
+; Test that SROA and promotion is not confused by a grab bax mixture of pointer
+; types involving wrapper aggregates and zero-length aggregate members.
+; CHECK: @test22
+
+entry:
+  %a1 = alloca { { [1 x { i32 }] } }
+  %a2 = alloca { {}, { float }, [0 x i8] }
+  %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
+; CHECK-NOT: alloca
+
+  %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
+  %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
+  store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
+
+  %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
+  %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
+  %load1 = load { [1 x { float }] }* %ptrcast1
+  %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
+
+  %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
+  store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
+
+  %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
+  %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
+  %load3 = load <4 x i8>* %ptrcast2
+  %valcast1 = bitcast <4 x i8> %load3 to i32
+
+  %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
+  %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
+  %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
+  %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
+  store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
+
+  %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
+  %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
+  %load4 = load { {}, float, {} }* %ptrcast4
+  %unwrap2 = extractvalue { {}, float, {} } %load4, 1
+  %valcast2 = bitcast float %unwrap2 to i32
+
+  ret i32 %valcast2
+; CHECK: ret i32
+}
author	Chandler Carruth <chandlerc@gmail.com>
	Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Sat, 13 Oct 2012 10:49:33 +0000 (10:49 +0000)
llvm/lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
llvm/test/Transforms/SROA/alignment.ll		patch \| blob \| history
llvm/test/Transforms/SROA/basictest.ll		patch \| blob \| history