Add files via upload
authorwjc404 <52632443+wjc404@users.noreply.github.com>
Mon, 4 Nov 2019 12:10:12 +0000 (20:10 +0800)
committerGitHub <noreply@github.com>
Mon, 4 Nov 2019 12:10:12 +0000 (20:10 +0800)
kernel/x86_64/sgemm_kernel_16x4_skylakex_2.c

index 3646c7d..5d49123 100644 (file)
 #define COMPUTE_m16(ndim) \
     INIT_m16n##ndim\
     "movq %%r13,%4; movq %%r14,%1; leaq (%1,%%r12,2),%%r15; addq %%r12,%%r15; movq %2,%5;"\
-    "cmpq $16,%4; jb "#ndim"016162f;"\
+    "cmpq $18,%4; jb "#ndim"016162f;"\
     #ndim"016161:\n\t"\
     KERNEL_k1m16n##ndim\
     KERNEL_k1m16n##ndim\
+    KERNEL_k1m16n##ndim\
     "prefetcht1 (%5); prefetcht1 63(%5); addq %3,%5;"\
     KERNEL_k1m16n##ndim\
     KERNEL_k1m16n##ndim\
-    "prefetcht1 (%8); addq $"#ndim",%8;"\
-    "subq $4,%4; cmpq $16,%4; jnb "#ndim"016161b;"\
+    KERNEL_k1m16n##ndim\
+    "prefetcht1 (%8); addq $32,%8;"\
+    "subq $6,%4; cmpq $18,%4; jnb "#ndim"016161b;"\
     "movq %2,%5;"\
     #ndim"016162:\n\t"\
     "testq %4,%4; jz "#ndim"016163f;"\