From e9f6a717dd9003b597defee48567431a7db7f9f4 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <reid@kleckner.net>
Date: Fri, 31 Oct 2014 17:10:41 +0000
Subject: [PATCH] Fix ARM HVA classification of classes with non-virtual bases

Reuse the PPC64 HVA detection algorithm for ARM and AArch64. This is a
nice code deduplication, since they are roughly identical. A few virtual
method extension points are needed to understand how big an HVA can be
and what element types it can have for a given architecture.

Also make the record expansion code work in the presence of non-virtual
bases.

Reviewed By: uweigand, asl

Differential Revision: http://reviews.llvm.org/D6045

llvm-svn: 220972
---
 clang/lib/CodeGen/ABIInfo.h                      |   9 +
 clang/lib/CodeGen/CGCall.cpp                     |  51 ++++-
 clang/lib/CodeGen/TargetInfo.cpp                 | 250 ++++++++++-------------
 clang/test/CodeGen/ppc64le-aggregates-cpp.cpp    |  39 ----
 clang/test/CodeGen/ppc64le-aggregates.c          |   6 +-
 clang/test/CodeGenCXX/homogeneous-aggregates.cpp |  94 +++++++++
 6 files changed, 261 insertions(+), 188 deletions(-)
 delete mode 100644 clang/test/CodeGen/ppc64le-aggregates-cpp.cpp
 create mode 100644 clang/test/CodeGenCXX/homogeneous-aggregates.cpp
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 9ca5766..2976b60 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -73,6 +73,15 @@ namespace clang {
     // abstract this out.
     virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGen::CodeGenFunction &CGF) const = 0;
+
+    virtual bool isHomogeneousAggregateBaseType(QualType Ty) const;
+
+    virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                   uint64_t Members) const;
+
+    bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                uint64_t &Members) const;
+
   };
 }  // end namespace clang
 
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 085d702..ffa28c1 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -549,10 +549,13 @@ struct ConstantArrayExpansion : TypeExpansion {
 };
 
 struct RecordExpansion : TypeExpansion {
+  SmallVector<const CXXBaseSpecifier *, 1> Bases;
+
   SmallVector<const FieldDecl *, 1> Fields;
 
-  RecordExpansion(SmallVector<const FieldDecl *, 1> &&Fields)
-      : TypeExpansion(TEK_Record), Fields(Fields) {}
+  RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases,
+                  SmallVector<const FieldDecl *, 1> &&Fields)
+      : TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {}
   static bool classof(const TypeExpansion *TE) {
     return TE->Kind == TEK_Record;
   }
@@ -582,6 +585,7 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
         AT->getElementType(), AT->getSize().getZExtValue());
   }
   if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    SmallVector<const CXXBaseSpecifier *, 1> Bases;
     SmallVector<const FieldDecl *, 1> Fields;
     const RecordDecl *RD = RT->getDecl();
     assert(!RD->hasFlexibleArrayMember() &&
@@ -604,13 +608,21 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
       if (LargestFD)
         Fields.push_back(LargestFD);
     } else {
+      if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+        assert(!CXXRD->isDynamicClass() &&
+               "cannot expand vtable pointers in dynamic classes");
+        for (const CXXBaseSpecifier &BS : CXXRD->bases())
+          Bases.push_back(&BS);
+      }
+
       for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         Fields.push_back(FD);
       }
     }
-    return llvm::make_unique<RecordExpansion>(std::move(Fields));
+    return llvm::make_unique<RecordExpansion>(std::move(Bases),
+                                              std::move(Fields));
   }
   if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
     return llvm::make_unique<ComplexExpansion>(CT->getElementType());
@@ -625,6 +637,8 @@ static int getExpansionSize(QualType Ty, const ASTContext &Context) {
   }
   if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
     int Res = 0;
+    for (auto BS : RExp->Bases)
+      Res += getExpansionSize(BS->getType(), Context);
     for (auto FD : RExp->Fields)
       Res += getExpansionSize(FD->getType(), Context);
     return Res;
@@ -644,9 +658,10 @@ CodeGenTypes::getExpandedTypes(QualType Ty,
       getExpandedTypes(CAExp->EltTy, TI);
     }
   } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
-    for (auto FD : RExp->Fields) {
+    for (auto BS : RExp->Bases)
+      getExpandedTypes(BS->getType(), TI);
+    for (auto FD : RExp->Fields)
       getExpandedTypes(FD->getType(), TI);
-    }
   } else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
     llvm::Type *EltTy = ConvertType(CExp->EltTy);
     *TI++ = EltTy;
@@ -670,6 +685,17 @@ void CodeGenFunction::ExpandTypeFromArgs(
       ExpandTypeFromArgs(CAExp->EltTy, LV, AI);
     }
   } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
+    llvm::Value *This = LV.getAddress();
+    for (const CXXBaseSpecifier *BS : RExp->Bases) {
+      // Perform a single step derived-to-base conversion.
+      llvm::Value *Base =
+          GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
+                                /*NullCheckValue=*/false, SourceLocation());
+      LValue SubLV = MakeAddrLValue(Base, BS->getType());
+
+      // Recurse onto bases.
+      ExpandTypeFromArgs(BS->getType(), SubLV, AI);
+    }
     for (auto FD : RExp->Fields) {
       // FIXME: What are the right qualifiers here?
       LValue SubLV = EmitLValueForField(LV, FD);
@@ -701,7 +727,20 @@ void CodeGenFunction::ExpandTypeToArgs(
       ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
     }
   } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
-    LValue LV = MakeAddrLValue(RV.getAggregateAddr(), Ty);
+    llvm::Value *This = RV.getAggregateAddr();
+    for (const CXXBaseSpecifier *BS : RExp->Bases) {
+      // Perform a single step derived-to-base conversion.
+      llvm::Value *Base =
+          GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
+                                /*NullCheckValue=*/false, SourceLocation());
+      RValue BaseRV = RValue::getAggregate(Base);
+
+      // Recurse onto bases.
+      ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
+                       IRCallArgPos);
+    }
+
+    LValue LV = MakeAddrLValue(This, Ty);
     for (auto FD : RExp->Fields) {
       RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
       ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs,
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 44bc36f..ed9e83f 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -85,6 +85,15 @@ const TargetInfo &ABIInfo::getTarget() const {
   return CGT.getTarget();
 }
 
+bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  return false;
+}
+
+bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                uint64_t Members) const {
+  return false;
+}
+
 void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
@@ -3044,12 +3053,14 @@ public:
 
   bool isPromotableTypeForABI(QualType Ty) const;
   bool isAlignedParamType(QualType Ty) const;
-  bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                              uint64_t &Members) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   // TODO: We can add more logic to computeInfo to improve performance.
   // Example: For aggregate arguments that fit in a register, we could
   // use getDirectInReg (as is done below for structs containing a single
@@ -3192,9 +3203,8 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
 /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
 /// aggregate.  Base is set to the base element type, and Members is set
 /// to the number of base elements.
-bool
-PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                           uint64_t &Members) const {
+bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                     uint64_t &Members) const {
   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
     uint64_t NElements = AT->getSize().getZExtValue();
     if (NElements == 0)
@@ -3263,19 +3273,9 @@ PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
       Ty = CT->getElementType();
     }
 
-    // Homogeneous aggregates for ELFv2 must have base types of float,
-    // double, long double, or 128-bit vectors.
-    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (BT->getKind() != BuiltinType::Float &&
-          BT->getKind() != BuiltinType::Double &&
-          BT->getKind() != BuiltinType::LongDouble)
-        return false;
-    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
-      if (getContext().getTypeSize(VT) != 128)
-        return false;
-    } else {
+    // Most ABIs only support float, double, and some vector type widths.
+    if (!isHomogeneousAggregateBaseType(Ty))
       return false;
-    }
 
     // The base type must be the same for all members.  Types that
     // agree in both total size and mode (float vs. vector) are
@@ -3288,14 +3288,34 @@ PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
       return false;
   }
+  return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
+}
+
+bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for ELFv2 must have base types of float,
+  // double, long double, or 128-bit vectors.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Float ||
+        BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble)
+      return true;
+  }
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    if (getContext().getTypeSize(VT) == 128)
+      return true;
+  }
+  return false;
+}
 
+bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
+    const Type *Base, uint64_t Members) const {
   // Vector types require one register, floating point types require one
   // or two registers depending on their size.
-  uint32_t NumRegs = Base->isVectorType() ? 1 :
-                       (getContext().getTypeSize(Base) + 63) / 64;
+  uint32_t NumRegs =
+      Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64;
 
   // Homogeneous Aggregates may occupy at most 8 registers.
-  return (Members > 0 && Members * NumRegs <= 8);
+  return Members * NumRegs <= 8;
 }
 
 ABIArgInfo
@@ -3586,6 +3606,10 @@ private:
   ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
                                   bool &IsHA, unsigned &AllocatedGPR,
                                   bool &IsSmallAggr, bool IsNamedArg) const;
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const {
@@ -3681,11 +3705,6 @@ public:
 };
 }
 
-static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context,
-                                   bool isAArch64,
-                                   uint64_t *HAMembers = nullptr);
-
 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
                                                 unsigned &AllocatedVFP,
                                                 bool &IsHA,
@@ -3765,7 +3784,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
+  if (isHomogeneousAggregate(Ty, Base, Members)) {
     IsHA = true;
     if (!IsNamedArg && isDarwinPCS()) {
       // With the Darwin ABI, variadic arguments are always passed on the stack
@@ -3823,7 +3842,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
-  if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true))
+  uint64_t Members = 0;
+  if (isHomogeneousAggregate(RetTy, Base, Members))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
 
@@ -3851,9 +3871,35 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
   return false;
 }
 
-static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
-                                     int AllocatedGPR, int AllocatedVFP,
-                                     bool IsIndirect, CodeGenFunction &CGF) {
+bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for AAPCS64 must have base types of a floating
+  // point type or a short-vector type. This is the same as the 32-bit ABI,
+  // but with the difference that any floating-point type is allowed,
+  // including __fp16.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->isFloatingPoint())
+      return true;
+  } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    unsigned VecSize = getContext().getTypeSize(VT);
+    if (VecSize == 64 || VecSize == 128)
+      return true;
+  }
+  return false;
+}
+
+bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                       uint64_t Members) const {
+  return Members <= 4;
+}
+
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                          CodeGenFunction &CGF) const {
+  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
+  bool IsHA = false, IsSmallAggr = false;
+  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
+                                       IsSmallAggr, false /*IsNamedArg*/);
+  bool IsIndirect = AI.isIndirect();
+
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
@@ -3959,8 +4005,8 @@ static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
   }
 
   const Type *Base = nullptr;
-  uint64_t NumMembers;
-  bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
+  uint64_t NumMembers = 0;
+  bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
   if (IsHFA && NumMembers > 1) {
     // Homogeneous aggregates passed in registers will have their elements split
     // and stored 16-bytes apart regardless of size (they're notionally in qN,
@@ -4079,18 +4125,6 @@ static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
   return ResAddr;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
-
-  return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP,
-                          AI.isIndirect(), CGF);
-}
-
 llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
                                            CodeGenFunction &CGF) const {
   // We do not support va_arg for aggregates or illegal vector types.
@@ -4103,7 +4137,8 @@ llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType T
   uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
 
   const Type *Base = nullptr;
-  bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true);
+  uint64_t Members = 0;
+  bool isHA = isHomogeneousAggregate(Ty, Base, Members);
 
   bool isIndirect = false;
   // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
@@ -4210,6 +4245,10 @@ private:
                                   bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   void computeInfo(CGFunctionInfo &FI) const override;
 
   llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -4389,101 +4428,6 @@ void ARMABIInfo::setRuntimeCC() {
     RuntimeCC = abiCC;
 }
 
-/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
-/// aggregate.  If HAMembers is non-null, the number of base elements
-/// contained in the type is returned through it; this is used for the
-/// recursive calls that check aggregate component types.
-static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context, bool isAArch64,
-                                   uint64_t *HAMembers) {
-  uint64_t Members = 0;
-  if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
-    if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
-      return false;
-    Members *= AT->getSize().getZExtValue();
-  } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
-    const RecordDecl *RD = RT->getDecl();
-    if (RD->hasFlexibleArrayMember())
-      return false;
-
-    Members = 0;
-    for (const auto *FD : RD->fields()) {
-      uint64_t FldMembers;
-      if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
-        return false;
-
-      Members = (RD->isUnion() ?
-                 std::max(Members, FldMembers) : Members + FldMembers);
-    }
-  } else {
-    Members = 1;
-    if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
-      Members = 2;
-      Ty = CT->getElementType();
-    }
-
-    // Homogeneous aggregates for AAPCS-VFP must have base types of float,
-    // double, or 64-bit or 128-bit vectors. "long double" has the same machine
-    // type as double, so it is also allowed as a base type.
-    // Homogeneous aggregates for AAPCS64 must have base types of a floating
-    // point type or a short-vector type. This is the same as the 32-bit ABI,
-    // but with the difference that any floating-point type is allowed,
-    // including __fp16.
-    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (isAArch64) {
-        if (!BT->isFloatingPoint())
-          return false;
-      } else {
-        if (BT->getKind() != BuiltinType::Float &&
-            BT->getKind() != BuiltinType::Double &&
-            BT->getKind() != BuiltinType::LongDouble)
-          return false;
-      }
-    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
-      unsigned VecSize = Context.getTypeSize(VT);
-      if (VecSize != 64 && VecSize != 128)
-        return false;
-    } else {
-      return false;
-    }
-
-    // The base type must be the same for all members.  Vector types of the
-    // same total size are treated as being equivalent here.
-    const Type *TyPtr = Ty.getTypePtr();
-    if (!Base)
-      Base = TyPtr;
-
-    if (Base != TyPtr) {
-      // Homogeneous aggregates are defined as containing members with the
-      // same machine type. There are two cases in which two members have
-      // different TypePtrs but the same machine type:
-
-      // 1) Vectors of the same length, regardless of the type and number
-      //    of their members.
-      const bool SameLengthVectors = Base->isVectorType() && TyPtr->isVectorType()
-        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
-
-      // 2) In the 32-bit AAPCS, `double' and `long double' have the same
-      //    machine type. This is not the case for the 64-bit AAPCS.
-      const bool SameSizeDoubles =
-           (   (   Base->isSpecificBuiltinType(BuiltinType::Double)
-                && TyPtr->isSpecificBuiltinType(BuiltinType::LongDouble))
-            || (   Base->isSpecificBuiltinType(BuiltinType::LongDouble)
-                && TyPtr->isSpecificBuiltinType(BuiltinType::Double)))
-        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
-
-      if (!SameLengthVectors && !SameSizeDoubles)
-        return false;
-    }
-  }
-
-  // Homogeneous Aggregates can have at most 4 members of the base type.
-  if (HAMembers)
-    *HAMembers = Members;
-
-  return (Members > 0 && Members <= 4);
-}
-
 /// markAllocatedVFPs - update VFPRegs according to the alignment and
 /// number of VFP registers (unit is S register) requested.
 void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
@@ -4640,7 +4584,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
     // into VFP registers.
     const Type *Base = nullptr;
     uint64_t Members = 0;
-    if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
+    if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Base can be a floating-point or a vector.
       if (Base->isVectorType()) {
@@ -4845,7 +4789,8 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (getABIKind() == AAPCS_VFP && !isVariadic) {
     const Type *Base = nullptr;
-    if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) {
+    uint64_t Members;
+    if (isHomogeneousAggregate(RetTy, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
@@ -4891,6 +4836,27 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
   return false;
 }
 
+bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for AAPCS-VFP must have base types of float,
+  // double, or 64-bit or 128-bit vectors.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Float ||
+        BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble)
+      return true;
+  } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    unsigned VecSize = getContext().getTypeSize(VT);
+    if (VecSize == 64 || VecSize == 128)
+      return true;
+  }
+  return false;
+}
+
+bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                   uint64_t Members) const {
+  return Members <= 4;
+}
+
 llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGenFunction &CGF) const {
   llvm::Type *BP = CGF.Int8PtrTy;
diff --git a/clang/test/CodeGen/ppc64le-aggregates-cpp.cpp b/clang/test/CodeGen/ppc64le-aggregates-cpp.cpp
deleted file mode 100644
index 7e7dde8..0000000
--- a/clang/test/CodeGen/ppc64le-aggregates-cpp.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
-
-// Test that C++ classes are correctly classified as homogeneous aggregates.
-
-struct Base1 {
-  int x;
-};
-struct Base2 {
-  double x;
-};
-struct Base3 {
-  double x;
-};
-struct D1 : Base1 {  // non-homogeneous aggregate
-  double y, z;
-};
-struct D2 : Base2 {  // homogeneous aggregate
-  double y, z;
-};
-struct D3 : Base1, Base2 {  // non-homogeneous aggregate
-  double y, z;
-};
-struct D4 : Base2, Base3 {  // homogeneous aggregate
-  double y, z;
-};
-
-// CHECK: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
-D1 func_D1(D1 x) { return x; }
-
-// CHECK: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
-D2 func_D2(D2 x) { return x; }
-
-// CHECK: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
-D3 func_D3(D3 x) { return x; }
-
-// CHECK: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
-D4 func_D4(D4 x) { return x; }
-
diff --git a/clang/test/CodeGen/ppc64le-aggregates.c b/clang/test/CodeGen/ppc64le-aggregates.c
index acf34a8..e193dcc 100644
--- a/clang/test/CodeGen/ppc64le-aggregates.c
+++ b/clang/test/CodeGen/ppc64le-aggregates.c
@@ -1,4 +1,3 @@
-// REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
 // Test homogeneous float aggregate passing and returning.
@@ -16,6 +15,8 @@ struct f9 { float f[9]; };
 struct fab { float a; float b; };
 struct fabc { float a; float b; float c; };
 
+struct f2a2b { float a[2]; float b[2]; };
+
 // CHECK: define [1 x float] @func_f1(float inreg %x.coerce)
 struct f1 func_f1(struct f1 x) { return x; }
 
@@ -49,6 +50,9 @@ struct fab func_fab(struct fab x) { return x; }
 // CHECK: define [3 x float] @func_fabc([3 x float] %x.coerce)
 struct fabc func_fabc(struct fabc x) { return x; }
 
+// CHECK: define [4 x float] @func_f2a2b([4 x float] %x.coerce)
+struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
+
 // CHECK-LABEL: @call_f1
 // CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
diff --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
new file mode 100644
index 0000000..6381840
--- /dev/null
+++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -0,0 +1,94 @@
+// RUNxX: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC
+// RUN: %clang_cc1 -mfloat-abi hard -triple armv7-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM32
+// RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64
+
+// Test that C++ classes are correctly classified as homogeneous aggregates.
+
+struct Base1 {
+  int x;
+};
+struct Base2 {
+  double x;
+};
+struct Base3 {
+  double x;
+};
+struct D1 : Base1 {  // non-homogeneous aggregate
+  double y, z;
+};
+struct D2 : Base2 {  // homogeneous aggregate
+  double y, z;
+};
+struct D3 : Base1, Base2 {  // non-homogeneous aggregate
+  double y, z;
+};
+struct D4 : Base2, Base3 {  // homogeneous aggregate
+  double y, z;
+};
+
+struct I1 : Base2 {};
+struct I2 : Base2 {};
+struct I3 : Base2 {};
+struct D5 : I1, I2, I3 {}; // homogeneous aggregate
+
+// PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, { [3 x i64] } %x.coerce)
+// ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
+D1 func_D1(D1 x) { return x; }
+
+// PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
+// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+D2 func_D2(D2 x) { return x; }
+
+// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, { [4 x i64] } %x.coerce)
+// ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, %struct.D3* %x)
+D3 func_D3(D3 x) { return x; }
+
+// PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
+// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+D4 func_D4(D4 x) { return x; }
+
+D5 func_D5(D5 x) { return x; }
+// PPC: define [3 x double] @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc %struct.D5 @_Z7func_D52D5(%struct.D5 %x.coerce)
+
+// The C++ multiple inheritance expansion case is a little more complicated, so
+// do some extra checking.
+//
+// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2)
+// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
+// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: store double %x.0, double*
+// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: store double %x.1, double*
+// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: store double %x.2, double*
+
+void call_D5(D5 *p) {
+  func_D5(*p);
+}
+
+// Check the call site.
+//
+// ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p)
+// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
+// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: load double*
+// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
+// ARM64: bitcast i8* %{{.*}} to %struct.I2*
+// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2*
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: load double*
+// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
+// ARM64: bitcast i8* %{{.*}} to %struct.I3*
+// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2*
+// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
+// ARM64: load double*
+// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
-- 
2.7.4