From 7fa7ea3e1e73a79edc6a9facaa573a5f94193827 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Tue, 5 Aug 2014 08:04:47 +0200 Subject: [PATCH] updated haswell optimized sgmv_n kernel --- kernel/x86_64/sgemv_n_microk_haswell-2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/sgemv_n_microk_haswell-2.c b/kernel/x86_64/sgemv_n_microk_haswell-2.c index 0bad0ec..b19db9e 100644 --- a/kernel/x86_64/sgemv_n_microk_haswell-2.c +++ b/kernel/x86_64/sgemv_n_microk_haswell-2.c @@ -35,6 +35,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y) __asm__ __volatile__ ( + "vzeroupper \n\t" "vbroadcastss (%2), %%ymm12 \n\t" // x0 "vbroadcastss 4(%2), %%ymm13 \n\t" // x1 "vbroadcastss 8(%2), %%ymm14 \n\t" // x2 @@ -64,6 +65,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y) "addq $16, %0 \n\t" "subq $16, %1 \n\t" "jnz .L01LOOP%= \n\t" + "vzeroupper \n\t" : : -- 2.7.4