projects
/
platform
/
upstream
/
openblas.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
| inline |
side by side
(parent:
3fbc13e
)
updated haswell optimized sgmv_n kernel
author
wernsaar
<wernsaar@googlemail.com>
Tue, 5 Aug 2014 06:04:47 +0000
(08:04 +0200)
committer
wernsaar
<wernsaar@googlemail.com>
Tue, 5 Aug 2014 06:04:47 +0000
(08:04 +0200)
kernel/x86_64/sgemv_n_microk_haswell-2.c
patch
|
blob
|
history
diff --git
a/kernel/x86_64/sgemv_n_microk_haswell-2.c
b/kernel/x86_64/sgemv_n_microk_haswell-2.c
index
0bad0ec
..
b19db9e
100644
(file)
--- a/
kernel/x86_64/sgemv_n_microk_haswell-2.c
+++ b/
kernel/x86_64/sgemv_n_microk_haswell-2.c
@@
-35,6
+35,7
@@
static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
__asm__ __volatile__
(
+ "vzeroupper \n\t"
"vbroadcastss (%2), %%ymm12 \n\t" // x0
"vbroadcastss 4(%2), %%ymm13 \n\t" // x1
"vbroadcastss 8(%2), %%ymm14 \n\t" // x2
@@
-64,6
+65,7
@@
static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
"addq $16, %0 \n\t"
"subq $16, %1 \n\t"
"jnz .L01LOOP%= \n\t"
+ "vzeroupper \n\t"
:
: