From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 10 Jul 2014 17:20:07 +0000 (+0000)
Subject: [PowerPC] ABI support for aligned by-value aggregates
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=581badce4be5243cd9af31277a99e80f9f0180e6;p=platform%2Fupstream%2Fllvm.git

[PowerPC] ABI support for aligned by-value aggregates

This patch adds support for respecting the ABI and type alignment
of aggregates passed by value.  Currently, all aggregates are aligned
at 8 bytes in the parameter save area.  This is incorrect for two
reasons:

- Aggregates that need alignment of 16 bytes or more should be aligned
  at 16 bytes in the parameter save area.  This is implemented by
  using an appropriate "byval align" attribute in the IR.

- Aggregates that need alignment beyond 16 bytes need to be dynamically
  realigned by the caller.  This is implemented by setting the Realign
  flag of the ABIArgInfo::getIndirect call.

In addition, when expanding a va_arg call accessing a type that is
aligned at 16 bytes in the argument save area (either one of the
aggregate types as above, or a vector type which is already aligned
at 16 bytes), code needs to align the va_list pointer accordingly.

Reviewed by Hal Finkel.

llvm-svn: 212743
---

diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index f4a1a5f..10050ac 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -2903,6 +2903,7 @@ public:
   PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
+  bool isAlignedParamType(QualType Ty) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -2993,6 +2994,43 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
   return false;
 }
 
+/// isAlignedParamType - Determine whether a type requires 16-byte
+/// alignment in the parameter area.
+bool
+PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
+  // Complex types are passed just like their elements.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Only vector types of size 16 bytes need alignment (larger types are
+  // passed via reference, smaller types are not aligned).
+  if (Ty->isVectorType())
+    return getContext().getTypeSize(Ty) == 128;
+
+  // For single-element float/vector structs, we consider the whole type
+  // to have the same alignment requirements as its single element.
+  const Type *AlignAsType = nullptr;
+  const Type *EltType = isSingleElementStruct(Ty, getContext());
+  if (EltType) {
+    const BuiltinType *BT = EltType->getAs<BuiltinType>();
+    if ((EltType->isVectorType() &&
+         getContext().getTypeSize(EltType) == 128) ||
+        (BT && BT->isFloatingPoint()))
+      AlignAsType = EltType;
+  }
+
+  // With special case aggregates, only vector base types need alignment.
+  if (AlignAsType)
+    return AlignAsType->isVectorType();
+
+  // Otherwise, we only need alignment for any aggregate type that
+  // has an alignment requirement of >= 16 bytes.
+  if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128)
+    return true;
+
+  return false;
+}
+
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
   if (Ty->isAnyComplexType())
@@ -3014,7 +3052,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
 
-    return ABIArgInfo::getIndirect(0);
+    uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
+    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
+                                   /*Realign=*/TyAlign > ABIAlign);
   }
 
   return (isPromotableTypeForABI(Ty) ?
@@ -3059,6 +3100,14 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
+  // Handle types that require 16-byte alignment in the parameter save area.
+  if (isAlignedParamType(Ty)) {
+    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(15));
+    AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt64(-16));
+    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
+  }
+
   // Update the va_list pointer.  The pointer should be bumped by the
   // size of the object.  We can trust getTypeSize() except for a complex
   // type whose base type is smaller than a doubleword.  For these, the
diff --git a/clang/test/CodeGen/ppc64-align-struct.c b/clang/test/CodeGen/ppc64-align-struct.c
new file mode 100644
index 0000000..272809f
--- /dev/null
+++ b/clang/test/CodeGen/ppc64-align-struct.c
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+#include <stdarg.h>
+
+struct test1 { int x; int y; };
+struct test2 { int x; int y; } __attribute__((aligned (16)));
+struct test3 { int x; int y; } __attribute__((aligned (32)));
+struct test4 { int x; int y; int z; };
+
+// CHECK: define void @test1(i32 signext %x, %struct.test1* byval align 8 %y)
+void test1 (int x, struct test1 y)
+{
+}
+
+// CHECK: define void @test2(i32 signext %x, %struct.test2* byval align 16 %y)
+void test2 (int x, struct test2 y)
+{
+}
+
+// This case requires run-time realignment of the incoming struct
+// CHECK: define void @test3(i32 signext %x, %struct.test3* byval align 16)
+// CHECK: %y = alloca %struct.test3, align 32
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+void test3 (int x, struct test3 y)
+{
+}
+
+// CHECK: define void @test4(i32 signext %x, %struct.test4* byval align 8 %y)
+void test4 (int x, struct test4 y)
+{
+}
+
+// CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 8
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.cur to %struct.test1*
+struct test1 test1va (int x, ...)
+{
+  struct test1 y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test1);
+  va_end(ap);
+  return y;
+}
+
+// CHECK: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
+// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
+// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
+// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i64 16
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.align to %struct.test2*
+struct test2 test2va (int x, ...)
+{
+  struct test2 y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test2);
+  va_end(ap);
+  return y;
+}
+
+// CHECK: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
+// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
+// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
+// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i64 32
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.align to %struct.test3*
+struct test3 test3va (int x, ...)
+{
+  struct test3 y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test3);
+  va_end(ap);
+  return y;
+}
+
+// CHECK: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 16
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.cur to %struct.test4*
+struct test4 test4va (int x, ...)
+{
+  struct test4 y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test4);
+  va_end(ap);
+  return y;
+}
+
+// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 16
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.cur to %struct.test_longdouble*
+struct test_longdouble { long double x; };
+struct test_longdouble testva_longdouble (int x, ...)
+{
+  struct test_longdouble y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test_longdouble);
+  va_end(ap);
+  return y;
+}
+
+// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %ap.cur = load i8** %ap
+// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
+// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
+// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
+// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i64 16
+// CHECK: store i8* %ap.next, i8** %ap
+// CHECK: bitcast i8* %ap.align to %struct.test_vector*
+struct test_vector { vector int x; };
+struct test_vector testva_vector (int x, ...)
+{
+  struct test_vector y;
+  va_list ap;
+  va_start(ap, x);
+  y = va_arg (ap, struct test_vector);
+  va_end(ap);
+  return y;
+}
+
diff --git a/clang/test/CodeGen/ppc64-vector.c b/clang/test/CodeGen/ppc64-vector.c
index e70544c..3ff07a4 100644
--- a/clang/test/CodeGen/ppc64-vector.c
+++ b/clang/test/CodeGen/ppc64-vector.c
@@ -45,7 +45,7 @@ v16i16 test_v16i16(v16i16 x)
   return x;
 }
 
-// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval %x)
+// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval align 16)
 struct v16i16 test_struct_v16i16(struct v16i16 x)
 {
   return x;