From 4c22828812a9d5f0962c836d4c8bf486fde4d9cb Mon Sep 17 00:00:00 2001 From: AbdelRauf Date: Fri, 30 Aug 2019 04:09:15 +0000 Subject: [PATCH] caxpy and cdot are using vec_vsx_ld --- kernel/power/caxpy.c | 67 +++++++++++++++++++++++++++++++--------------------- kernel/power/cdot.c | 52 ++++++++++++++++++++++------------------ 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/kernel/power/caxpy.c b/kernel/power/caxpy.c index 00f2ec5..0545766 100644 --- a/kernel/power/caxpy.c +++ b/kernel/power/caxpy.c @@ -27,6 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #ifndef HAVE_ASM_KERNEL #include + +#define offset_0 0 +#define offset_1 16 +#define offset_2 32 +#define offset_3 48 +#define offset_4 64 +#define offset_5 80 +#define offset_6 96 +#define offset_7 112 + static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) @@ -43,27 +53,28 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT #endif __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); - register __vector float *vy = (__vector float *) y; - register __vector float *vx = (__vector float *) x; + register __vector float *vptr_y = (__vector float *) y; + register __vector float *vptr_x = (__vector float *) x; BLASLONG i=0; - for (; i < n/2; i += 8) { + for(;i + +#define offset_0 0 +#define offset_1 16 +#define offset_2 32 +#define offset_3 48 + + + static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) { __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); - register __vector float *vy = (__vector float *) y; - register __vector float *vx = (__vector float *) x; - BLASLONG i = 0; + register __vector float *vptr_y = (__vector float *) y; + register __vector float *vptr_x = (__vector float *) x; register __vector float vd_0 = { 0 }; register __vector float vd_1 = { 0 }; register __vector float vd_2 = { 0 }; @@ -41,26 +48,23 @@ static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) register __vector float vdd_0 = { 0 }; register __vector float vdd_1 = { 0 }; register __vector float vdd_2 = { 0 }; - register __vector float vdd_3 = { 0 }; - for (; i < n/2; i += 4) { - - register __vector float vyy_0 ; - register __vector float vyy_1 ; - register __vector float vyy_2 ; - register __vector float vyy_3 ; - - register __vector float vy_0 = vy[i]; - register __vector float vy_1 = vy[i + 1]; - register __vector float vy_2 = vy[i + 2]; - register __vector float vy_3 = vy[i + 3]; - register __vector float vx_0= vx[i]; - register __vector float vx_1 = vx[i + 1]; - register __vector float vx_2 = vx[i + 2]; - register __vector float vx_3 = vx[i + 3]; - vyy_0 = vec_perm(vy_0, vy_0, swap_mask); - vyy_1 = vec_perm(vy_1, vy_1, swap_mask); - vyy_2 = vec_perm(vy_2, vy_2, swap_mask); - vyy_3 = vec_perm(vy_3, vy_3, swap_mask); + register __vector float vdd_3 = { 0 }; + BLASLONG i=0; + for(;i