From 5824a4f1b0962bfe7a80d382d0ed9ce2050d3d88 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 11 Feb 2013 23:44:11 +0000 Subject: [PATCH] When generating IR for default copy-constructors, copy-assignment operators, move-constructors and move-assignment operators, use memcpy to copy adjacent POD members. Previously, classes with one or more Non-POD members would fall back on element-wise copies for all members, including POD members. This often generated a lot of IR. Without padding metadata, it wasn't often possible for the LLVM optimizers to turn the element-wise copies into a memcpy. This code hasn't yet received any serious tuning. I didn't see any serious regressions on a self-hosted clang build, or any of the nightly tests, but I think it's important to get this out in the wild to get more testing. Insights, feedback and comments welcome. Many thanks to David Blaikie, Richard Smith, and especially John McCall for their help and feedback on this work. llvm-svn: 174919 --- clang/lib/CodeGen/CGClass.cpp | 359 ++++++++++++++++++++- clang/lib/CodeGen/CodeGenFunction.cpp | 5 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp | 6 - .../CodeGenCXX/implicit-copy-assign-operator.cpp | 2 +- .../test/CodeGenCXX/implicit-copy-constructor.cpp | 5 +- 6 files changed, 367 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index ce32acd..6bee452 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -18,6 +18,7 @@ #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Frontend/CodeGenOptions.h" using namespace clang; @@ -742,6 +743,342 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { ExitCXXTryStmt(*cast(Body), true); } +namespace { + class FieldMemcpyizer { + public: + FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl, + const VarDecl *SrcRec) + : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), + RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)), + FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0), + LastAddedFieldIndex(0) { } + + static bool isMemcpyableField(FieldDecl *F) { + Qualifiers Qual = F->getType().getQualifiers(); + if (Qual.hasVolatile() || Qual.hasObjCLifetime()) + return false; + return true; + } + + void addMemcpyableField(FieldDecl *F) { + if (FirstField == 0) + addInitialField(F); + else + addNextField(F); + } + + CharUnits getMemcpySize() const { + unsigned LastFieldSize = + CGF.getContext().getTypeInfo(LastField->getType()).first; + uint64_t MemcpySizeBits = + LastFieldOffset + LastFieldSize - FirstFieldOffset + + CGF.getContext().getCharWidth() - 1; + CharUnits MemcpySize = + CGF.getContext().toCharUnitsFromBits(MemcpySizeBits); + return MemcpySize; + } + + void emitMemcpy() { + // Give the subclass a chance to bail out if it feels the memcpy isn't + // worth it (e.g. Hasn't aggregated enough data). + if (FirstField == 0) { + return; + } + + unsigned FirstFieldAlign = + CGF.getContext().getTypeInfo(FirstField->getType()).second; + assert(FirstFieldOffset % FirstFieldAlign == 0 && "Bad field alignment."); + CharUnits Alignment = + CGF.getContext().toCharUnitsFromBits(FirstFieldAlign); + CharUnits MemcpySize = getMemcpySize(); + QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); + llvm::Value *ThisPtr = CGF.LoadCXXThis(); + LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); + LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField); + llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec)); + LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy); + LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField); + + emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(), + Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(), + MemcpySize, Alignment); + reset(); + } + + void reset() { + FirstField = 0; + } + + protected: + CodeGenFunction &CGF; + const CXXRecordDecl *ClassDecl; + + private: + + void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr, + CharUnits Size, CharUnits Alignment) { + llvm::PointerType *DPT = cast(DestPtr->getType()); + llvm::Type *DBP = + llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace()); + DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP); + + llvm::PointerType *SPT = cast(SrcPtr->getType()); + llvm::Type *SBP = + llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace()); + SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP); + + CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(), + Alignment.getQuantity()); + } + + void addInitialField(FieldDecl *F) { + FirstField = F; + LastField = F; + FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + LastFieldOffset = FirstFieldOffset; + LastAddedFieldIndex = F->getFieldIndex(); + return; + } + + void addNextField(FieldDecl *F) { + assert(F->getFieldIndex() == LastAddedFieldIndex + 1 && + "Cannot aggregate non-contiguous fields."); + LastAddedFieldIndex = F->getFieldIndex(); + + // The 'first' and 'last' fields are chosen by offset, rather than field + // index. This allows the code to support bitfields, as well as regular + // fields. + uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + if (FOffset < FirstFieldOffset) { + FirstField = F; + FirstFieldOffset = FOffset; + } else if (FOffset > LastFieldOffset) { + LastField = F; + LastFieldOffset = FOffset; + } + } + + const VarDecl *SrcRec; + const ASTRecordLayout &RecLayout; + FieldDecl *FirstField; + FieldDecl *LastField; + uint64_t FirstFieldOffset, LastFieldOffset; + unsigned LastAddedFieldIndex; + }; + + class ConstructorMemcpyizer : public FieldMemcpyizer { + private: + + /// Get source argument for copy constructor. Returns null if not a copy + /// constructor. + static const VarDecl* getTrivialCopySource(const CXXConstructorDecl *CD, + FunctionArgList &Args) { + if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined()) + return Args[Args.size() - 1]; + return 0; + } + + // Returns true if a CXXCtorInitializer represents a member initialization + // that can be rolled into a memcpy. + bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const { + if (!MemcpyableCtor) + return false; + FieldDecl *Field = MemberInit->getMember(); + assert(Field != 0 && "No field for member init."); + QualType FieldType = Field->getType(); + CXXConstructExpr *CE = dyn_cast(MemberInit->getInit()); + + // Bail out on non-POD, not-trivially-constructable members. + if (!(CE && CE->getConstructor()->isTrivial()) && + !(FieldType.isTriviallyCopyableType(CGF.getContext()) || + FieldType->isReferenceType())) + return false; + + // Bail out on volatile fields. + if (!isMemcpyableField(Field)) + return false; + + // Otherwise we're good. + return true; + } + + public: + ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD, + FunctionArgList &Args) + : FieldMemcpyizer(CGF, CD->getParent(), getTrivialCopySource(CD, Args)), + ConstructorDecl(CD), + MemcpyableCtor(CD->isImplicitlyDefined() && + CD->isCopyOrMoveConstructor() && + CGF.getLangOpts().getGC() == LangOptions::NonGC), + Args(Args) { } + + void addMemberInitializer(CXXCtorInitializer *MemberInit) { + if (isMemberInitMemcpyable(MemberInit)) { + AggregatedInits.push_back(MemberInit); + addMemcpyableField(MemberInit->getMember()); + } else { + emitAggregatedInits(); + EmitMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit, + ConstructorDecl, Args); + } + } + + void emitAggregatedInits() { + if (AggregatedInits.size() <= 1) { + // This memcpy is too small to be worthwhile. Fall back on default + // codegen. + for (unsigned i = 0; i < AggregatedInits.size(); ++i) { + EmitMemberInitializer(CGF, ConstructorDecl->getParent(), + AggregatedInits[i], ConstructorDecl, Args); + } + reset(); + return; + } + + pushEHDestructors(); + emitMemcpy(); + AggregatedInits.clear(); + } + + void pushEHDestructors() { + llvm::Value *ThisPtr = CGF.LoadCXXThis(); + QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); + LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); + + for (unsigned i = 0; i < AggregatedInits.size(); ++i) { + QualType FieldType = AggregatedInits[i]->getMember()->getType(); + QualType::DestructionKind dtorKind = FieldType.isDestructedType(); + if (CGF.needsEHCleanup(dtorKind)) + CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + } + } + + void finish() { + emitAggregatedInits(); + } + + private: + const CXXConstructorDecl *ConstructorDecl; + bool MemcpyableCtor; + FunctionArgList &Args; + SmallVector AggregatedInits; + }; + + class AssignmentMemcpyizer : public FieldMemcpyizer { + private: + + // Returns the memcpyable field copied by the given statement, if one + // exists. Otherwise r + FieldDecl* getMemcpyableField(Stmt *S) { + if (!AssignmentsMemcpyable) + return 0; + if (BinaryOperator *BO = dyn_cast(S)) { + // Recognise trivial assignments. + if (BO->getOpcode() != BO_Assign) + return 0; + MemberExpr *ME = dyn_cast(BO->getLHS()); + if (!ME) + return 0; + FieldDecl *Field = dyn_cast(ME->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + Stmt *RHS = BO->getRHS(); + if (ImplicitCastExpr *EC = dyn_cast(RHS)) + RHS = EC->getSubExpr(); + if (!RHS) + return 0; + MemberExpr *ME2 = dyn_cast(RHS); + if (dyn_cast(ME2->getMemberDecl()) != Field) + return 0; + return Field; + } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) { + CXXMethodDecl *MD = dyn_cast(MCE->getCalleeDecl()); + if (!(MD && (MD->isCopyAssignmentOperator() || + MD->isMoveAssignmentOperator()) && + MD->isTrivial())) + return 0; + MemberExpr *IOA = dyn_cast(MCE->getImplicitObjectArgument()); + if (!IOA) + return 0; + FieldDecl *Field = dyn_cast(IOA->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + MemberExpr *Arg0 = dyn_cast(MCE->getArg(0)); + if (!Arg0 || Field != dyn_cast(Arg0->getMemberDecl())) + return 0; + return Field; + } else if (CallExpr *CE = dyn_cast(S)) { + FunctionDecl *FD = dyn_cast(CE->getCalleeDecl()); + if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy) + return 0; + Expr *DstPtr = CE->getArg(0); + if (ImplicitCastExpr *DC = dyn_cast(DstPtr)) + DstPtr = DC->getSubExpr(); + UnaryOperator *DUO = dyn_cast(DstPtr); + if (!DUO || DUO->getOpcode() != UO_AddrOf) + return 0; + MemberExpr *ME = dyn_cast(DUO->getSubExpr()); + if (!ME) + return 0; + FieldDecl *Field = dyn_cast(ME->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + Expr *SrcPtr = CE->getArg(1); + if (ImplicitCastExpr *SC = dyn_cast(SrcPtr)) + SrcPtr = SC->getSubExpr(); + UnaryOperator *SUO = dyn_cast(SrcPtr); + if (!SUO || SUO->getOpcode() != UO_AddrOf) + return 0; + MemberExpr *ME2 = dyn_cast(SUO->getSubExpr()); + if (!ME2 || Field != dyn_cast(ME2->getMemberDecl())) + return 0; + return Field; + } + + return 0; + } + + bool AssignmentsMemcpyable; + SmallVector AggregatedStmts; + + public: + + AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD, + FunctionArgList &Args) + : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]), + AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) { + assert(Args.size() == 2); + } + + void emitAssignment(Stmt *S) { + FieldDecl *F = getMemcpyableField(S); + if (F) { + addMemcpyableField(F); + AggregatedStmts.push_back(S); + } else { + emitAggregatedStmts(); + CGF.EmitStmt(S); + } + } + + void emitAggregatedStmts() { + if (AggregatedStmts.size() <= 1) { + for (unsigned i = 0; i < AggregatedStmts.size(); ++i) + CGF.EmitStmt(AggregatedStmts[i]); + reset(); + } + + emitMemcpy(); + AggregatedStmts.clear(); + } + + void finish() { + emitAggregatedStmts(); + } + }; + +} + /// EmitCtorPrologue - This routine generates necessary code to initialize /// base classes and non-static data members belonging to this constructor. void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, @@ -770,8 +1107,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, InitializeVTablePointers(ClassDecl); + ConstructorMemcpyizer CM(*this, CD, Args); for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I) - EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args); + CM.addMemberInitializer(MemberInitializers[I]); + CM.finish(); } static bool @@ -940,6 +1279,24 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { ExitCXXTryStmt(*cast(Body), true); } +void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) { + const CXXMethodDecl *AssignOp = cast(CurGD.getDecl()); + const Stmt *RootS = AssignOp->getBody(); + assert(isa(RootS) && + "Body of an implicit assignment operator should be compound stmt."); + const CompoundStmt *RootCS = cast(RootS); + + LexicalScope Scope(*this, RootCS->getSourceRange()); + + AssignmentMemcpyizer AM(*this, AssignOp, Args); + for (CompoundStmt::const_body_iterator I = RootCS->body_begin(), + E = RootCS->body_end(); + I != E; ++I) { + AM.emitAssignment(*I); + } + AM.finish(); +} + namespace { /// Call the operator delete associated with the current destructor. struct CallDtorDelete : EHScopeStack::Cleanup { diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index ec139df..86c79ae 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -559,6 +559,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // The lambda "__invoke" function is special, because it forwards or // clones the body of the function call operator (but is actually static). EmitLambdaStaticInvokeFunction(cast(FD)); + } else if (FD->isDefaulted() && isa(FD) && + cast(FD)->isCopyAssignmentOperator()) { + // Implicit copy-assignment gets the same special treatment as implicit + // copy-constructors. + emitImplicitAssignmentOperatorBody(Args); } else EmitFunctionBody(Args); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d43b588..fcdd31f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1402,6 +1402,7 @@ public: void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); + void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); void EmitFunctionBody(FunctionArgList &Args); void EmitForwardingCallToLambda(const CXXRecordDecl *Lambda, diff --git a/clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp b/clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp index 46d0483..5d09b54 100644 --- a/clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp +++ b/clang/test/CodeGenCXX/copy-assign-synthesis-1.cpp @@ -96,14 +96,8 @@ int main() { // CHECK-LP64: .globl __ZN1XaSERKS_ // CHECK-LP64: .weak_definition __ZN1XaSERKS_ // CHECK-LP64: __ZN1XaSERKS_: -// CHECK-LP64: .globl __ZN1QaSERKS_ -// CHECK-LP64: .weak_definition __ZN1QaSERKS_ -// CHECK-LP64: __ZN1QaSERKS_: // CHECK-LP32: .globl __ZN1XaSERKS_ // CHECK-LP32: .weak_definition __ZN1XaSERKS_ // CHECK-LP32: __ZN1XaSERKS_: -// CHECK-LP32: .globl __ZN1QaSERKS_ -// CHECK-LP32: .weak_definition __ZN1QaSERKS_ -// CHECK-LP32: __ZN1QaSERKS_: diff --git a/clang/test/CodeGenCXX/implicit-copy-assign-operator.cpp b/clang/test/CodeGenCXX/implicit-copy-assign-operator.cpp index 0ec89fc..79586fb 100644 --- a/clang/test/CodeGenCXX/implicit-copy-assign-operator.cpp +++ b/clang/test/CodeGenCXX/implicit-copy-assign-operator.cpp @@ -44,7 +44,7 @@ void test_D(D d1, D d2) { // CHECK: {{call.*_ZN1AaSERS_}} // CHECK: {{call.*_ZN1BaSERS_}} // CHECK: {{call.*_ZN1CaSERKS_}} -// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}} +// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}} // CHECK: {{call.*_ZN1BaSERS_}} // CHECK: br // CHECK: {{call.*_ZN1CaSERKS_}} diff --git a/clang/test/CodeGenCXX/implicit-copy-constructor.cpp b/clang/test/CodeGenCXX/implicit-copy-constructor.cpp index 8a3a422..24e84d5 100644 --- a/clang/test/CodeGenCXX/implicit-copy-constructor.cpp +++ b/clang/test/CodeGenCXX/implicit-copy-constructor.cpp @@ -46,7 +46,7 @@ void f(D d) { // CHECK: call void @_ZN1AD1Ev // CHECK: call void @_ZN1AC2ERS_ // CHECK: call void @_ZN1BC2ERS_ -// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}} +// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}} // CHECK: call void @_ZN1BC1ERS_ // CHECK: br // CHECK: {{icmp ult.*, 2}} @@ -54,8 +54,7 @@ void f(D d) { // CHECK: call void @_ZN1AC1Ev // CHECK: call void @_ZN1CC1ERS_1A // CHECK: call void @_ZN1AD1Ev -// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 288}} -// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 12}} +// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 300}} // CHECK: ret void -- 2.7.4