Update dgemm_kernel_8x8_skylakex.c
authorwjc404 <52632443+wjc404@users.noreply.github.com>
Tue, 15 Oct 2019 19:20:08 +0000 (03:20 +0800)
committerGitHub <noreply@github.com>
Tue, 15 Oct 2019 19:20:08 +0000 (03:20 +0800)
kernel/x86_64/dgemm_kernel_8x8_skylakex.c

index b4a87cb..69437e6 100644 (file)
@@ -1,4 +1,5 @@
 #include "common.h"
+#include <stdint.h>
 #include <immintrin.h>
 /* row-major c_block */
 /* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/
     INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\
     INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)
 
-#define COMPUTE_m1n8 {\
-    __asm__ __volatile__(\
-    INNER_INIT_m1n8\
-    INNER_KERNELm1(8)\
-    INNER_SAVE_m1n8\
-    :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
-    :"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\
-    c_pointer += 1;\
-}
-#define COMPUTE_m2n8 {\
-    __asm__ __volatile__(\
-    INNER_INIT_m2n8\
-    INNER_KERNELm2(8)\
-    INNER_SAVE_m2n8\
-    :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
-    :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\
-    c_pointer += 2;\
-}
-#define COMPUTE_m4n8 {\
-    __asm__ __volatile__(\
-    INNER_INIT_m4n8\
-    INNER_KERNELm4(8)\
-    INNER_SAVE_m4n8\
-    :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\
-    :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\
-    c_pointer += 4;\
-}
-#define COMPUTE_m8n8 {\
-    __asm__ __volatile__(\
-    INNER_INIT_m8n8\
-    INNER_KERNELm8(8)\
-    INNER_SAVE_m8n8\
-    :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\
-    :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\
-    c_pointer += 8;\
-}
-
 #define COMPUTE_n8 {\
     __asm__ __volatile__(\
     "movq %8,%%r14;movq %2,%%r13;"\