From 41fe6e864ed70860cda1b1ccef09b55caf41fec9 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 3 Dec 2020 14:40:11 -0600 Subject: [PATCH] POWER10: Update param.h Increasing the values of DGEMM_DEFAULT_P and DGEMM_DEFAULT_Q helps in improving performance ~10% for DGEMM. --- param.h | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/param.h b/param.h index 7789c83..ee5ad17 100644 --- a/param.h +++ b/param.h @@ -2388,7 +2388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#if defined(POWER9) || defined(POWER10) +#if defined(POWER9) #define SNUMOPT 16 #define DNUMOPT 8 @@ -2426,6 +2426,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #if defined(POWER10) +#define SNUMOPT 16 +#define DNUMOPT 8 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 65536 +#define GEMM_DEFAULT_ALIGN 0x0ffffUL + +#define SGEMM_DEFAULT_UNROLL_M 16 +#define SGEMM_DEFAULT_UNROLL_N 8 +#define DGEMM_DEFAULT_UNROLL_M 8 +#define DGEMM_DEFAULT_UNROLL_N 8 +#define CGEMM_DEFAULT_UNROLL_M 8 +#define CGEMM_DEFAULT_UNROLL_N 4 +#define ZGEMM_DEFAULT_UNROLL_M 8 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 832 +#define DGEMM_DEFAULT_P 320 +#define CGEMM_DEFAULT_P 512 +#define ZGEMM_DEFAULT_P 256 + +#define SGEMM_DEFAULT_Q 1026 +#define DGEMM_DEFAULT_Q 960 +#define CGEMM_DEFAULT_Q 1026 +#define ZGEMM_DEFAULT_Q 1026 + +#define SGEMM_DEFAULT_R 4096 +#define DGEMM_DEFAULT_R 4096 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 8 + #undef SBGEMM_DEFAULT_UNROLL_N #undef SBGEMM_DEFAULT_UNROLL_M #undef SBGEMM_DEFAULT_P @@ -2436,10 +2469,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SBGEMM_DEFAULT_P 832 #define SBGEMM_DEFAULT_Q 1026 #define SBGEMM_DEFAULT_R 4096 -#undef DGEMM_DEFAULT_UNROLL_M -#undef DGEMM_DEFAULT_UNROLL_N -#define DGEMM_DEFAULT_UNROLL_M 8 -#define DGEMM_DEFAULT_UNROLL_N 8 #endif #if defined(SPARC) && defined(V7) -- 2.7.4