Ref #458: Backport, sandybrigde uses nehalem zgemm kernel
authorWerner Saar <wernsaar@googlemail.com>
Mon, 22 Dec 2014 16:01:18 +0000 (17:01 +0100)
committerWerner Saar <wernsaar@googlemail.com>
Mon, 22 Dec 2014 16:01:18 +0000 (17:01 +0100)
kernel/x86_64/KERNEL.SANDYBRIDGE
param.h

index 61e13a1..ff96cd0 100644 (file)
@@ -34,17 +34,17 @@ CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
 CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
 CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
-
-ZGEMMKERNEL    =  zgemm_kernel_4x4_sandy.S
-ZGEMMINCOPY    =
-ZGEMMITCOPY    =
+ZGEMMKERNEL    =  zgemm_kernel_1x4_nehalem.S
+ZGEMMINCOPY    =  zgemm_ncopy_1.S
+ZGEMMITCOPY    =  zgemm_tcopy_1.S
 ZGEMMONCOPY    =  ../generic/zgemm_ncopy_4.c
 ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_4.c
-ZGEMMINCOPYOBJ =
-ZGEMMITCOPYOBJ =
+ZGEMMINCOPYOBJ =  zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ =  zgemm_itcopy$(TSUFFIX).$(SUFFIX)
 ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
 ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
+
 #STRSMKERNEL_LN        =  trsm_kernel_LN_4x8_nehalem.S
 #STRSMKERNEL_LT        =  trsm_kernel_LT_4x8_nehalem.S
 #STRSMKERNEL_RN        =  trsm_kernel_LT_4x8_nehalem.S
diff --git a/param.h b/param.h
index 28ed91e..bce05c9 100644 (file)
--- a/param.h
+++ b/param.h
@@ -1129,7 +1129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define DGEMM_DEFAULT_UNROLL_M 8
 #define QGEMM_DEFAULT_UNROLL_M 2
 #define CGEMM_DEFAULT_UNROLL_M 8
-#define ZGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_M 1
 #define XGEMM_DEFAULT_UNROLL_M 1
 
 #define SGEMM_DEFAULT_UNROLL_N 4