From 2cb4d133f5e36ffe0202d04103775dbe545775fe Mon Sep 17 00:00:00 2001 From: Roman Gareev Date: Mon, 25 Jul 2016 07:27:59 +0000 Subject: [PATCH] [NFC] Refactor creation of the BLIS mirco-kernel and improve documentation Reviewed-by: Tobias Grosser llvm-svn: 276616 --- polly/include/polly/ScheduleOptimizer.h | 25 +++++++++++++++++++++++ polly/lib/Transform/ScheduleOptimizer.cpp | 33 ++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/polly/include/polly/ScheduleOptimizer.h b/polly/include/polly/ScheduleOptimizer.h index d8bcbb2..f41fe1d 100644 --- a/polly/include/polly/ScheduleOptimizer.h +++ b/polly/include/polly/ScheduleOptimizer.h @@ -20,6 +20,16 @@ struct isl_schedule; struct isl_schedule_node; struct isl_union_map; +/// @brief Parameters of the micro kernel. +/// +/// Parameters, which determine sizes of rank-1 (i.e., outer product) update +/// used in the optimized matrix multiplication. +/// +struct MicroKernelParamsTy { + int Mr; + int Nr; +}; + namespace polly { extern bool DisablePollyTiling; class Scop; @@ -232,6 +242,21 @@ private: /// /// @param Node The node to check. static bool isMatrMultPattern(__isl_keep isl_schedule_node *Node); + + /// @brief Create the BLIS macro-kernel. + /// + /// We create the BLIS macro-kernel by applying a combination of tiling + /// of dimensions of the band node and interchanging of two innermost + /// modified dimensions. The values passed in MicroKernelParam are used + /// as tile sizes. + /// + /// @param Node The schedule node to be modified. + /// @param MicroKernelParams Parameters of the micro kernel + /// to be used as tile sizes. + /// @see MicroKernelParamsTy + static __isl_give isl_schedule_node * + createMicroKernel(__isl_take isl_schedule_node *Node, + MicroKernelParamsTy MicroKernelParams); }; #endif diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 22ae87e..e1689e5 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -493,10 +493,27 @@ static __isl_give isl_map *circularShiftOutputDims(__isl_take isl_map *IslMap) { return isl_map_set_tuple_id(IslMap, isl_dim_in, InputDimsId); } -__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern( - __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) { +__isl_give isl_schedule_node *ScheduleTreeOptimizer::createMicroKernel( + __isl_take isl_schedule_node *Node, MicroKernelParamsTy MicroKernelParams) { + return applyRegisterTiling(Node, {MicroKernelParams.Mr, MicroKernelParams.Nr}, + 1); +} + +/// Get parameters of the BLIS micro kernel. +/// +/// We choose the Mr and Nr parameters of the micro kernel to be large enough +/// such that no stalls caused by the combination of latencies and dependencies +/// are introduced during the updates of the resulting matrix of the matrix +/// multiplication. However, they should also be as small as possible to +/// release more registers for entries of multiplied matrices. +/// +/// @param TTI Target Transform Info. +/// @return The structure of type MicroKernelParamsTy. +/// @see MicroKernelParamsTy +static struct MicroKernelParamsTy +getMicroKernelParams(const llvm::TargetTransformInfo *TTI) { assert(TTI && "The target transform info should be provided."); - // Get a micro-kernel. + // Nvec - Number of double-precision floating-point numbers that can be hold // by a vector register. Use 2 by default. auto Nvec = TTI->getRegisterBitWidth(true) / 64; @@ -505,8 +522,14 @@ __isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern( int Nr = ceil(sqrt(Nvec * LatencyVectorFma * ThrougputVectorFma) / Nvec) * Nvec; int Mr = ceil(Nvec * LatencyVectorFma * ThrougputVectorFma / Nr); - std::vector MicroKernelParams{Mr, Nr}; - Node = applyRegisterTiling(Node, MicroKernelParams, 1); + return {Mr, Nr}; +} + +__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern( + __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) { + assert(TTI && "The target transform info should be provided."); + auto MicroKernelParams = getMicroKernelParams(TTI); + Node = createMicroKernel(Node, MicroKernelParams); return Node; } -- 2.7.4