From 3a67daa95496f549aa23ab0991ef12353b8be50f Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Fri, 24 Apr 2015 10:56:55 +0200 Subject: [PATCH] optimized ddot.c for increments != 1 --- kernel/x86_64/ddot.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/ddot.c b/kernel/x86_64/ddot.c index c17741c..0f77d5f 100644 --- a/kernel/x86_64/ddot.c +++ b/kernel/x86_64/ddot.c @@ -101,15 +101,40 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) } + FLOAT temp1 = 0.0; + FLOAT temp2 = 0.0; + + BLASLONG n1 = n & -4; + + while(i < n1) + { + + FLOAT m1 = y[iy] * x[ix] ; + FLOAT m2 = y[iy+inc_y] * x[ix+inc_x] ; + + FLOAT m3 = y[iy+2*inc_y] * x[ix+2*inc_x] ; + FLOAT m4 = y[iy+3*inc_y] * x[ix+3*inc_x] ; + + ix += inc_x*4 ; + iy += inc_y*4 ; + + temp1 += m1+m3; + temp2 += m2+m4; + + i+=4 ; + + } + while(i < n) { - dot += y[iy] * x[ix] ; + temp1 += y[iy] * x[ix] ; ix += inc_x ; iy += inc_y ; i++ ; } + dot = temp1 + temp2; return(dot); } -- 2.7.4