updated haswell optimized sgmv_n kernel
authorwernsaar <wernsaar@googlemail.com>
Tue, 5 Aug 2014 06:04:47 +0000 (08:04 +0200)
committerwernsaar <wernsaar@googlemail.com>
Tue, 5 Aug 2014 06:04:47 +0000 (08:04 +0200)
kernel/x86_64/sgemv_n_microk_haswell-2.c

index 0bad0ec..b19db9e 100644 (file)
@@ -35,6 +35,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
 
        __asm__  __volatile__
        (
+       "vzeroupper                      \n\t"
        "vbroadcastss    (%2), %%ymm12   \n\t"  // x0 
        "vbroadcastss   4(%2), %%ymm13   \n\t"  // x1 
        "vbroadcastss   8(%2), %%ymm14   \n\t"  // x2 
@@ -64,6 +65,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
         "addq          $16, %0                       \n\t"
        "subq           $16, %1                       \n\t"             
        "jnz            .L01LOOP%=                    \n\t"
+       "vzeroupper                      \n\t"
 
        :
         :