From c22068c4060dba66dbdfeda28b57c0ac0fff5f82 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Fri, 24 Apr 2015 13:13:20 +0200 Subject: [PATCH] optimized sdot.c for increments != 1 --- kernel/x86_64/sdot.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index c146590..a6da1fe 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -80,7 +80,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -32; + BLASLONG n1 = n & -32; if ( n1 ) sdot_kernel_16(n1, x, y , &dot ); @@ -99,6 +99,18 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) } + BLASLONG n1 = n & -2; + + while(i < n1) + { + + dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x]; + ix += inc_x*2 ; + iy += inc_y*2 ; + i+=2 ; + + } + while(i < n) { -- 2.7.4