From b5e5bc760e73d613c109519f962a2e62bfc53430 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Fri, 8 Mar 2019 01:26:49 +0000 Subject: [PATCH] Variable auto-init: split out small arrays Summary: Following up with r355181, initialize small arrays as well. LLVM stage2 shows a tiny size gain. Reviewers: glider, pcc, kcc, rjmccall Subscribers: jkorous, dexonsmith, jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D58885 llvm-svn: 355660 --- clang/lib/CodeGen/CGDecl.cpp | 60 +++++++++++++++++++++------------ clang/test/CodeGenCXX/auto-var-init.cpp | 24 ++++++++----- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 4acfb15..c09077bb 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -970,12 +970,12 @@ static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, return llvm::isBytewiseValue(Init); } -/// Decide whether we want to split a constant structure store into a sequence -/// of its fields' stores. This may cost us code size and compilation speed, -/// but plays better with store optimizations. -static bool shouldSplitStructStore(CodeGenModule &CGM, - uint64_t GlobalByteSize) { - // Don't break structures that occupy more than one cacheline. +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule &CGM, + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. uint64_t ByteSizeLimit = 64; if (CGM.getCodeGenOpts().OptimizationLevel == 0) return false; @@ -1203,9 +1203,9 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, CGBuilderTy &Builder, llvm::Constant *constant) { auto *Ty = constant->getType(); - bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || - Ty->isFPOrFPVectorTy(); - if (isScalar) { + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { Builder.CreateStore(constant, Loc, isVolatile); return; } @@ -1213,12 +1213,13 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); - // If the initializer is all or mostly the same, codegen with bzero / memset - // then do a few stores afterward. uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); if (!ConstantSize) return; auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); + + // If the initializer is all or mostly the same, codegen with bzero / memset + // then do a few stores afterward. if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); @@ -1232,6 +1233,7 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } + // If the initializer is a repeated byte pattern, use memset. llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); if (Pattern) { uint64_t Value = 0x00; @@ -1245,20 +1247,34 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } - llvm::StructType *STy = dyn_cast(Ty); - // FIXME: handle the case when STy != Loc.getElementType(). - // FIXME: handle non-struct aggregate types. - if (STy && (STy == Loc.getElementType()) && - shouldSplitStructStore(CGM, ConstantSize)) { - for (unsigned i = 0; i != constant->getNumOperands(); i++) { - Address EltPtr = Builder.CreateStructGEP(Loc, i); - emitStoresForConstant( - CGM, D, EltPtr, isVolatile, Builder, - cast(Builder.CreateExtractValue(constant, i))); + // If the initializer is small, use a handful of stores. + if (shouldSplitConstantStore(CGM, ConstantSize)) { + if (auto *STy = dyn_cast(Ty)) { + // FIXME: handle the case when STy != Loc.getElementType(). + if (STy == Loc.getElementType()) { + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + Address EltPtr = Builder.CreateStructGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast(Builder.CreateExtractValue(constant, i))); + } + return; + } + } else if (auto *ATy = dyn_cast(Ty)) { + // FIXME: handle the case when ATy != Loc.getElementType(). + if (ATy == Loc.getElementType()) { + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast(Builder.CreateExtractValue(constant, i))); + } + return; + } } - return; } + // Copy from a global. Builder.CreateMemCpy( Loc, createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 04f147c..821387e 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -129,7 +129,6 @@ struct arraytail { int i; int arr[]; }; // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16 @@ -1019,13 +1018,20 @@ TEST_CUSTOM(bool4, bool[4], { true, true, true, true }); // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(intptr4, int*[4]); -// CHECK-LABEL: @test_intptr4_uninit() -// CHECK: %uninit = alloca [4 x i32*], align -// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) -// PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// CHECK-LABEL: @test_intptr4_uninit() +// CHECK: %uninit = alloca [4 x i32*], align +// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) +// PATTERN-O1-LABEL: @test_intptr4_uninit() +// PATTERN-O1: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 +// PATTERN-O1-NEXT: %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8 +// PATTERN-O1-NEXT: %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16 +// PATTERN-O1-NEXT: %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8 +// ZERO-LABEL: @test_intptr4_uninit() +// ZERO: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(intptr4, int*[4]); // CHECK-LABEL: @test_intptr4_braces() @@ -1124,7 +1130,7 @@ TEST_UNINIT(atomicnotlockfree, _Atomic(notlockfree)); // PATTERN-LABEL: @test_atomicnotlockfree_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0, -- 2.7.4