When generating IR for default copy-constructors, copy-assignment operators,
authorLang Hames <lhames@gmail.com>
Mon, 11 Feb 2013 23:44:11 +0000 (23:44 +0000)
committerLang Hames <lhames@gmail.com>
Mon, 11 Feb 2013 23:44:11 +0000 (23:44 +0000)
move-constructors and move-assignment operators, use memcpy to copy adjacent
POD members.

Previously, classes with one or more Non-POD members would fall back on
element-wise copies for all members, including POD members. This often
generated a lot of IR. Without padding metadata, it wasn't often possible
for the LLVM optimizers to turn the element-wise copies into a memcpy.

This code hasn't yet received any serious tuning. I didn't see any serious
regressions on a self-hosted clang build, or any of the nightly tests, but
I think it's important to get this out in the wild to get more testing.
Insights, feedback and comments welcome.

Many thanks to David Blaikie, Richard Smith, and especially John McCall for
their help and feedback on this work.

llvm-svn: 174919

clang/lib/CodeGen/CGClass.cpp
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp
clang/test/CodeGenCXX/implicit-copy-assign-operator.cpp
clang/test/CodeGenCXX/implicit-copy-constructor.cpp

index ce32acd..6bee452 100644 (file)
@@ -18,6 +18,7 @@
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Frontend/CodeGenOptions.h"
 
 using namespace clang;
@@ -742,6 +743,342 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+namespace {
+  class FieldMemcpyizer {
+  public:
+    FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl,
+                    const VarDecl *SrcRec)
+      : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
+        RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
+        FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
+        LastAddedFieldIndex(0) { }
+
+    static bool isMemcpyableField(FieldDecl *F) {
+      Qualifiers Qual = F->getType().getQualifiers();
+      if (Qual.hasVolatile() || Qual.hasObjCLifetime())
+        return false;
+      return true;
+    }
+
+    void addMemcpyableField(FieldDecl *F) {
+      if (FirstField == 0)
+        addInitialField(F);
+      else
+        addNextField(F);
+    }
+
+    CharUnits getMemcpySize() const {
+      unsigned LastFieldSize =
+        CGF.getContext().getTypeInfo(LastField->getType()).first; 
+      uint64_t MemcpySizeBits =
+        LastFieldOffset + LastFieldSize - FirstFieldOffset +
+        CGF.getContext().getCharWidth() - 1;
+      CharUnits MemcpySize =
+        CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+      return MemcpySize;
+    }
+
+    void emitMemcpy() {
+      // Give the subclass a chance to bail out if it feels the memcpy isn't
+      // worth it (e.g. Hasn't aggregated enough data).
+      if (FirstField == 0) {
+        return;
+      }
+
+      unsigned FirstFieldAlign =
+        CGF.getContext().getTypeInfo(FirstField->getType()).second;
+      assert(FirstFieldOffset % FirstFieldAlign == 0 && "Bad field alignment.");
+      CharUnits Alignment =
+        CGF.getContext().toCharUnitsFromBits(FirstFieldAlign);
+      CharUnits MemcpySize = getMemcpySize();
+      QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+      LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField);
+      llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec));
+      LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
+      LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField);
+
+      emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(),
+                   Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(),
+                   MemcpySize, Alignment);
+      reset();
+    }
+
+    void reset() {
+      FirstField = 0;
+    }
+
+  protected:
+    CodeGenFunction &CGF;
+    const CXXRecordDecl *ClassDecl;
+
+  private:
+
+    void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+                      CharUnits Size, CharUnits Alignment) {
+      llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
+      llvm::Type *DBP =
+        llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace());
+      DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP);
+
+      llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
+      llvm::Type *SBP =
+        llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace());
+      SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP);
+
+      CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(),
+                               Alignment.getQuantity());
+    }
+
+    void addInitialField(FieldDecl *F) {
+        FirstField = F;
+        LastField = F;
+        FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+        LastFieldOffset = FirstFieldOffset;
+        LastAddedFieldIndex = F->getFieldIndex();
+        return;
+      }
+
+    void addNextField(FieldDecl *F) {
+      assert(F->getFieldIndex() == LastAddedFieldIndex + 1 &&
+             "Cannot aggregate non-contiguous fields.");
+      LastAddedFieldIndex = F->getFieldIndex();
+
+      // The 'first' and 'last' fields are chosen by offset, rather than field
+      // index. This allows the code to support bitfields, as well as regular
+      // fields.
+      uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+      if (FOffset < FirstFieldOffset) {
+        FirstField = F;
+        FirstFieldOffset = FOffset;
+      } else if (FOffset > LastFieldOffset) {
+        LastField = F;
+        LastFieldOffset = FOffset;
+      }
+    }
+
+    const VarDecl *SrcRec;
+    const ASTRecordLayout &RecLayout;
+    FieldDecl *FirstField;
+    FieldDecl *LastField;
+    uint64_t FirstFieldOffset, LastFieldOffset;
+    unsigned LastAddedFieldIndex;
+  };
+
+  class ConstructorMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    /// Get source argument for copy constructor. Returns null if not a copy
+    /// constructor. 
+    static const VarDecl* getTrivialCopySource(const CXXConstructorDecl *CD,
+                                               FunctionArgList &Args) {
+      if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined())
+        return Args[Args.size() - 1];
+      return 0; 
+    }
+
+    // Returns true if a CXXCtorInitializer represents a member initialization
+    // that can be rolled into a memcpy.
+    bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const {
+      if (!MemcpyableCtor)
+        return false;
+      FieldDecl *Field = MemberInit->getMember();
+      assert(Field != 0 && "No field for member init.");
+      QualType FieldType = Field->getType();
+      CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
+
+      // Bail out on non-POD, not-trivially-constructable members.
+      if (!(CE && CE->getConstructor()->isTrivial()) &&
+          !(FieldType.isTriviallyCopyableType(CGF.getContext()) ||
+            FieldType->isReferenceType()))
+        return false;
+
+      // Bail out on volatile fields.
+      if (!isMemcpyableField(Field))
+        return false;
+
+      // Otherwise we're good.
+      return true;
+    }
+
+  public:
+    ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD,
+                          FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, CD->getParent(), getTrivialCopySource(CD, Args)),
+        ConstructorDecl(CD),
+        MemcpyableCtor(CD->isImplicitlyDefined() &&
+                       CD->isCopyOrMoveConstructor() &&
+                       CGF.getLangOpts().getGC() == LangOptions::NonGC),
+        Args(Args) { }
+
+    void addMemberInitializer(CXXCtorInitializer *MemberInit) {
+      if (isMemberInitMemcpyable(MemberInit)) {
+        AggregatedInits.push_back(MemberInit);
+        addMemcpyableField(MemberInit->getMember());
+      } else {
+        emitAggregatedInits();
+        EmitMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit,
+                              ConstructorDecl, Args);
+      }
+    }
+
+    void emitAggregatedInits() {
+      if (AggregatedInits.size() <= 1) {
+        // This memcpy is too small to be worthwhile. Fall back on default
+        // codegen.
+        for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+          EmitMemberInitializer(CGF, ConstructorDecl->getParent(),
+                                AggregatedInits[i], ConstructorDecl, Args);
+        }
+        reset();
+        return;
+      }
+
+      pushEHDestructors();
+      emitMemcpy();
+      AggregatedInits.clear();
+    }
+
+    void pushEHDestructors() {
+      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+      LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+
+      for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+        QualType FieldType = AggregatedInits[i]->getMember()->getType();
+        QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+        if (CGF.needsEHCleanup(dtorKind))
+          CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType);
+      }
+    }
+
+    void finish() {
+      emitAggregatedInits();
+    }
+
+  private:
+    const CXXConstructorDecl *ConstructorDecl;
+    bool MemcpyableCtor;
+    FunctionArgList &Args;
+    SmallVector<CXXCtorInitializer*, 16> AggregatedInits;
+  };
+
+  class AssignmentMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    // Returns the memcpyable field copied by the given statement, if one
+    // exists. Otherwise r
+    FieldDecl* getMemcpyableField(Stmt *S) {
+      if (!AssignmentsMemcpyable)
+        return 0;
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
+        // Recognise trivial assignments.
+        if (BO->getOpcode() != BO_Assign)
+          return 0;
+        MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
+        if (!ME)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        Stmt *RHS = BO->getRHS();
+        if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
+          RHS = EC->getSubExpr();
+        if (!RHS)
+          return 0;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
+        if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
+          return 0;
+        return Field;
+      } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
+        CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
+        if (!(MD && (MD->isCopyAssignmentOperator() ||
+                       MD->isMoveAssignmentOperator()) &&
+              MD->isTrivial()))
+          return 0;
+        MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
+        if (!IOA)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
+        if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
+          return 0;
+        return Field;
+      } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
+        FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
+        if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
+          return 0;
+        Expr *DstPtr = CE->getArg(0);
+        if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
+          DstPtr = DC->getSubExpr();
+        UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
+        if (!DUO || DUO->getOpcode() != UO_AddrOf)
+          return 0;
+        MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
+        if (!ME)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        Expr *SrcPtr = CE->getArg(1);
+        if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
+          SrcPtr = SC->getSubExpr();
+        UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
+        if (!SUO || SUO->getOpcode() != UO_AddrOf)
+          return 0;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
+        if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
+          return 0;
+        return Field;
+      }
+
+      return 0;
+    }
+
+    bool AssignmentsMemcpyable;
+    SmallVector<Stmt*, 16> AggregatedStmts;
+
+  public:
+
+    AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
+                         FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
+        AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) {
+      assert(Args.size() == 2);
+    }
+
+    void emitAssignment(Stmt *S) {
+      FieldDecl *F = getMemcpyableField(S);
+      if (F) {
+        addMemcpyableField(F);
+        AggregatedStmts.push_back(S);
+      } else {  
+        emitAggregatedStmts();
+        CGF.EmitStmt(S);
+      }
+    }
+
+    void emitAggregatedStmts() {
+      if (AggregatedStmts.size() <= 1) {
+        for (unsigned i = 0; i < AggregatedStmts.size(); ++i)
+          CGF.EmitStmt(AggregatedStmts[i]);
+        reset();
+      }
+
+      emitMemcpy();
+      AggregatedStmts.clear();
+    }
+
+    void finish() {
+      emitAggregatedStmts();
+    }
+  };
+
+}
+
 /// EmitCtorPrologue - This routine generates necessary code to initialize
 /// base classes and non-static data members belonging to this constructor.
 void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
@@ -770,8 +1107,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
 
   InitializeVTablePointers(ClassDecl);
 
+  ConstructorMemcpyizer CM(*this, CD, Args);
   for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I)
-    EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args);
+    CM.addMemberInitializer(MemberInitializers[I]);
+  CM.finish();
 }
 
 static bool
@@ -940,6 +1279,24 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) {
+  const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl());
+  const Stmt *RootS = AssignOp->getBody();
+  assert(isa<CompoundStmt>(RootS) &&
+         "Body of an implicit assignment operator should be compound stmt.");
+  const CompoundStmt *RootCS = cast<CompoundStmt>(RootS);
+
+  LexicalScope Scope(*this, RootCS->getSourceRange());
+
+  AssignmentMemcpyizer AM(*this, AssignOp, Args);
+  for (CompoundStmt::const_body_iterator I = RootCS->body_begin(),
+                                         E = RootCS->body_end();
+       I != E; ++I) {
+    AM.emitAssignment(*I);  
+  }
+  AM.finish();
+}
+
 namespace {
   /// Call the operator delete associated with the current destructor.
   struct CallDtorDelete : EHScopeStack::Cleanup {
index ec139df..86c79ae 100644 (file)
@@ -559,6 +559,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     // The lambda "__invoke" function is special, because it forwards or
     // clones the body of the function call operator (but is actually static).
     EmitLambdaStaticInvokeFunction(cast<CXXMethodDecl>(FD));
+  } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) &&
+             cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator()) {
+    // Implicit copy-assignment gets the same special treatment as implicit
+    // copy-constructors.
+    emitImplicitAssignmentOperatorBody(Args);
   }
   else
     EmitFunctionBody(Args);
index d43b588..fcdd31f 100644 (file)
@@ -1402,6 +1402,7 @@ public:
 
   void EmitConstructorBody(FunctionArgList &Args);
   void EmitDestructorBody(FunctionArgList &Args);
+  void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
   void EmitFunctionBody(FunctionArgList &Args);
 
   void EmitForwardingCallToLambda(const CXXRecordDecl *Lambda,
index 46d0483..5d09b54 100644 (file)
@@ -96,14 +96,8 @@ int main() {
 // CHECK-LP64: .globl   __ZN1XaSERKS_
 // CHECK-LP64: .weak_definition  __ZN1XaSERKS_
 // CHECK-LP64: __ZN1XaSERKS_:
-// CHECK-LP64: .globl   __ZN1QaSERKS_
-// CHECK-LP64: .weak_definition  __ZN1QaSERKS_
-// CHECK-LP64: __ZN1QaSERKS_:
 
 // CHECK-LP32: .globl   __ZN1XaSERKS_
 // CHECK-LP32: .weak_definition  __ZN1XaSERKS_
 // CHECK-LP32: __ZN1XaSERKS_:
-// CHECK-LP32: .globl   __ZN1QaSERKS_
-// CHECK-LP32: .weak_definition  __ZN1QaSERKS_
-// CHECK-LP32: __ZN1QaSERKS_:
 
index 0ec89fc..79586fb 100644 (file)
@@ -44,7 +44,7 @@ void test_D(D d1, D d2) {
 // CHECK: {{call.*_ZN1AaSERS_}}
 // CHECK: {{call.*_ZN1BaSERS_}}
 // CHECK: {{call.*_ZN1CaSERKS_}}
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}}
 // CHECK: {{call.*_ZN1BaSERS_}}
 // CHECK: br
 // CHECK: {{call.*_ZN1CaSERKS_}}
index 8a3a422..24e84d5 100644 (file)
@@ -46,7 +46,7 @@ void f(D d) {
 // CHECK: call void @_ZN1AD1Ev
 // CHECK: call void @_ZN1AC2ERS_
 // CHECK: call void @_ZN1BC2ERS_
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}}
 // CHECK: call void @_ZN1BC1ERS_
 // CHECK: br
 // CHECK: {{icmp ult.*, 2}}
@@ -54,8 +54,7 @@ void f(D d) {
 // CHECK: call void @_ZN1AC1Ev
 // CHECK: call void @_ZN1CC1ERS_1A
 // CHECK: call void @_ZN1AD1Ev
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 288}}
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 12}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 300}}
 // CHECK: ret void