From bd5c6039c69482fcfa35a5861c11edc0c5b6f032 Mon Sep 17 00:00:00 2001 From: Roman Gareev Date: Wed, 21 Dec 2016 12:37:36 +0000 Subject: [PATCH] Align newly created arrays to the first level cache line boundary Aligning data to cache lines boundaries helps to avoid overheads related to an access to it ([1]). This patch aligns newly created arrays and adds an option to specify the first level cache line size. By default we use 64 bytes, which is a typical cache-line size ([2]). In case of Intel Core i7-3820 SandyBridge and the following options, clang -O3 gemm.c -I utilities/ utilities/polybench.c -DPOLYBENCH_TIME -march=native -mllvm -polly -mllvm -polly-pattern-matching-based-opts=true -DPOLYBENCH_USE_SCALAR_LB -mllvm -polly-target-cache-level-associativity=8,8 -mllvm -polly-target-cache-level-sizes=32768,262144 -mllvm -polly-target-latency-vector-fma=8 it helps to improve the performance from 11.303 GFlops/sec (39,247% of theoretical peak) to 12.63 GFlops/sec (43,8542% of theoretical peak). Refs.: [1] - http://www.alexonlinux.com/aligned-vs-unaligned-memory-access [2] - http://igoro.com/archive/gallery-of-processor-cache-effects/ Differential Revision: https://reviews.llvm.org/D28020 Reviewed-by: Tobias Grosser llvm-svn: 290253 --- polly/lib/CodeGen/IslNodeBuilder.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 74a308b..d162fc2 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -74,6 +74,11 @@ static cl::opt PollyGenerateExpressions( cl::desc("Generate AST expressions for unmodified and modified accesses"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt PollyTargetFirstLevelCacheLineSize( + "polly-target-first-level-cache-line-size", + cl::desc("The size of the first level cache line size specified in bytes."), + cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); + __isl_give isl_ast_expr * IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For, ICmpInst::Predicate &Predicate) { @@ -1269,8 +1274,8 @@ void IslNodeBuilder::allocateNewArrays() { auto InstIt = Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator(); - Value *CreatedArray = - new AllocaInst(NewArrayType, SAI->getName(), &*InstIt); + auto *CreatedArray = new AllocaInst(NewArrayType, SAI->getName(), &*InstIt); + CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); SAI->setBasePtr(CreatedArray); } } -- 2.7.4