From: Xianyi Zhang Date: Mon, 28 Feb 2022 12:33:11 +0000 (+0800) Subject: Update RISC-V Intrinsic API. X-Git-Tag: upstream/0.3.21~1^2~42^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=968e1f51d80bce07678d60fe5e22de557bd798a3;p=platform%2Fupstream%2Fopenblas.git Update RISC-V Intrinsic API. --- diff --git a/Makefile.prebuild b/Makefile.prebuild index 399db95..71ea71d 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -46,7 +46,7 @@ TARGET_FLAGS = -mips64r6 endif ifeq ($(TARGET), C910V) -TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v +TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d endif all: getarch_2nd diff --git a/Makefile.riscv64 b/Makefile.riscv64 index 15d7b05..ce91e03 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -1,4 +1,4 @@ ifeq ($(CORE), C910V) -CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static +CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 +FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static endif diff --git a/common_riscv64.h b/common_riscv64.h index 27f385d..7ddbe80 100644 --- a/common_riscv64.h +++ b/common_riscv64.h @@ -92,7 +92,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define SEEK_ADDRESS #if defined(C910V) -#include +#include #endif #endif diff --git a/kernel/riscv64/amax_vector.c b/kernel/riscv64/amax_vector.c index 5312f9e..b778d3e 100644 --- a/kernel/riscv64/amax_vector.c +++ b/kernel/riscv64/amax_vector.c @@ -88,8 +88,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -97,8 +97,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #endif @@ -113,8 +113,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -122,8 +122,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #endif @@ -131,7 +131,8 @@ asm volatile( j += gvl*2; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_zero, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); + //maxf = v_res[0]; } for(;j maxf) - maxf = v_res[0]; + if(*((FLOAT*)&v_res) > maxf) + maxf = *((FLOAT*)&v_res); j += gvl; } }else{ @@ -179,8 +180,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -188,8 +189,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #endif @@ -204,8 +205,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -213,8 +214,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #endif @@ -223,7 +224,7 @@ asm volatile( ix += inc_xv*2; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_zero, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); } for(;j maxf) - maxf = v_res[0]; + if(*((FLOAT*)&v_res) > maxf) + maxf = *((FLOAT*)&v_res); j += gvl; } } diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c index ae2867e..fd2f83d 100644 --- a/kernel/riscv64/amin_vector.c +++ b/kernel/riscv64/amin_vector.c @@ -87,8 +87,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -96,8 +96,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v0) - :"v"(mask0), "f"(zero), "r"(gvl) + :"+vd"(v0) + :"vd"(mask0), "f"(zero), "r"(gvl) :"v0"); #endif v_min = VFMINVV_FLOAT(v_min, v0, gvl); @@ -111,8 +111,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e64,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #else asm volatile( @@ -120,8 +120,8 @@ asm volatile( "vor.vv v0, %1, %1\n\t" "vsetvli x0, %3, e32,m8 \n\t" "vfrsub.vf %0, %0, %2, v0.t \n\t" - :"+v"(v1) - :"v"(mask1), "f"(zero), "r"(gvl) + :"+vd"(v1) + :"vd"(mask1), "f"(zero), "r"(gvl) :"v0"); #endif @@ -129,7 +129,7 @@ asm volatile( j += gvl*2; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); } for(;j +#include #define KERNEL8x4_I \ "addi t1, %[PB], 1*8 \n\t"\ diff --git a/kernel/riscv64/dot_vector.c b/kernel/riscv64/dot_vector.c index 1d92699..64efc6c 100644 --- a/kernel/riscv64/dot_vector.c +++ b/kernel/riscv64/dot_vector.c @@ -31,9 +31,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLEV_FLOAT vle_v_f32m4 #define VLSEV_FLOAT vlse_v_f32m4 -#define VFREDSUM_FLOAT vfredsum_vs_f32m4_f32m1 +#define VFREDSUM_FLOAT vfredosum_vs_f32m4_f32m1 #define VFMACCVV_FLOAT vfmacc_vv_f32m4 #define VFMVVF_FLOAT vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 vfmv_v_f_f32m1 @@ -43,9 +44,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLEV_FLOAT vle_v_f64m4 #define VLSEV_FLOAT vlse_v_f64m4 -#define VFREDSUM_FLOAT vfredsum_vs_f64m4_f64m1 +#define VFREDSUM_FLOAT vfredusum_vs_f64m4_f64m1 #define VFMACCVV_FLOAT vfmacc_vv_f64m4 #define VFMVVF_FLOAT vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 vfmv_v_f_f64m1 @@ -81,7 +83,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) } if(j > 0){ v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); } //tail if(j < n){ @@ -92,12 +94,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) //vr = VFDOTVV_FLOAT(vx, vy, gvl); vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); } }else if(inc_y == 1){ gvl = VSETVL(n); vr = VFMVVF_FLOAT(0, gvl); - unsigned int stride_x = inc_x * sizeof(FLOAT); + int stride_x = inc_x * sizeof(FLOAT); for(i=0,j=0; i 0){ v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } //tail if(j < n){ @@ -117,12 +120,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) //vr = VFDOTVV_FLOAT(vx, vy, gvl); vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } }else if(inc_x == 1){ gvl = VSETVL(n); vr = VFMVVF_FLOAT(0, gvl); - unsigned int stride_y = inc_y * sizeof(FLOAT); + int stride_y = inc_y * sizeof(FLOAT); for(i=0,j=0; i 0){ v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } //tail if(j < n){ @@ -142,13 +147,14 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) //vr = VFDOTVV_FLOAT(vx, vy, gvl); vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } }else{ gvl = VSETVL(n); vr = VFMVVF_FLOAT(0, gvl); - unsigned int stride_x = inc_x * sizeof(FLOAT); - unsigned int stride_y = inc_y * sizeof(FLOAT); + int stride_x = inc_x * sizeof(FLOAT); + int stride_y = inc_y * sizeof(FLOAT); for(i=0,j=0; i 0){ v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } //tail if(j < n){ @@ -168,7 +175,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) //vr = VFDOTVV_FLOAT(vx, vy, gvl); vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - dot += v_res[0]; + dot += (double)VFMVFS_FLOAT(v_res); + } } return(dot); diff --git a/kernel/riscv64/gemv_t_vector.c b/kernel/riscv64/gemv_t_vector.c index ceba107..7683641 100644 --- a/kernel/riscv64/gemv_t_vector.c +++ b/kernel/riscv64/gemv_t_vector.c @@ -31,9 +31,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLEV_FLOAT vle_v_f32m4 #define VLSEV_FLOAT vlse_v_f32m4 -#define VFREDSUM_FLOAT vfredsum_vs_f32m4_f32m1 +#define VFREDSUM_FLOAT vfredosum_vs_f32m4_f32m1 #define VFMACCVV_FLOAT vfmacc_vv_f32m4 #define VFMVVF_FLOAT vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 vfmv_v_f_f32m1 @@ -44,9 +45,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLEV_FLOAT vle_v_f64m4 #define VLSEV_FLOAT vlse_v_f64m4 -#define VFREDSUM_FLOAT vfredsum_vs_f64m4_f64m1 +#define VFREDSUM_FLOAT vfredusum_vs_f64m4_f64m1 #define VFMACCVV_FLOAT vfmacc_vv_f64m4 #define VFMVVF_FLOAT vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 vfmv_v_f_f64m1 @@ -80,7 +82,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO j += gvl; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp = v_res[0]; + temp = (FLOAT)VFMVFS_FLOAT(v_res); if(j < m){ gvl = VSETVL(m-j); va = VLEV_FLOAT(&a_ptr[j], gvl); @@ -88,7 +90,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO vr = VFMULVV_FLOAT(va, vx, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp += v_res[0]; + temp += (FLOAT)VFMVFS_FLOAT(v_res); } y[iy] += alpha * temp; iy += inc_y; @@ -96,9 +98,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO } }else{ BLASLONG stride_x = inc_x * sizeof(FLOAT); - BLASLONG inc_xv = inc_x * gvl; + for(i = 0; i < n; i++){ gvl = VSETVL(m); + BLASLONG inc_xv = inc_x * gvl; j = 0; ix = 0; vr = VFMVVF_FLOAT(0, gvl); @@ -110,7 +113,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO ix += inc_xv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp = v_res[0]; + temp = (FLOAT)VFMVFS_FLOAT(v_res); if(j < m){ gvl = VSETVL(m-j); va = VLEV_FLOAT(&a_ptr[j], gvl); @@ -118,7 +121,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO vr = VFMULVV_FLOAT(va, vx, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp += v_res[0]; + temp += (FLOAT)VFMVFS_FLOAT(v_res); } y[iy] += alpha * temp; iy += inc_y; diff --git a/kernel/riscv64/iamax_vector.c b/kernel/riscv64/iamax_vector.c index 056c0aa..ecb4cd7 100644 --- a/kernel/riscv64/iamax_vector.c +++ b/kernel/riscv64/iamax_vector.c @@ -117,10 +117,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); if(j < n){ gvl = VSETVL(n-j); @@ -130,7 +130,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - FLOAT cur_maxf = v_res[0]; + FLOAT cur_maxf = *((FLOAT*)&v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -138,7 +138,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); } } }else{ @@ -165,10 +165,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) idx += inc_v; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); if(j < n){ gvl = VSETVL(n-j); @@ -178,7 +178,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - FLOAT cur_maxf = v_res[0]; + FLOAT cur_maxf = *((FLOAT*)&v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -186,7 +186,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); } } } diff --git a/kernel/riscv64/iamin_vector.c b/kernel/riscv64/iamin_vector.c index 5bcffec..c72bb94 100644 --- a/kernel/riscv64/iamin_vector.c +++ b/kernel/riscv64/iamin_vector.c @@ -118,10 +118,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); mask = VMFLEVF_FLOAT(v_min, minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); if(j < n){ gvl = VSETVL(n-j); @@ -131,7 +131,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_min = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - FLOAT cur_minf = v_res[0]; + FLOAT cur_minf = *((FLOAT*)&v_res); if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); @@ -139,7 +139,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); } } }else{ @@ -166,10 +166,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) idx += inc_v; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); mask = VMFLEVF_FLOAT(v_min, minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); if(j < n){ gvl = VSETVL(n-j); @@ -179,7 +179,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_min = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - FLOAT cur_minf = v_res[0]; + FLOAT cur_minf = *((FLOAT*)&v_res); if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); @@ -187,7 +187,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); } } } diff --git a/kernel/riscv64/imax_vector.c b/kernel/riscv64/imax_vector.c index 42705f5..c2d787a 100644 --- a/kernel/riscv64/imax_vector.c +++ b/kernel/riscv64/imax_vector.c @@ -111,17 +111,17 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); if(j < n){ gvl = VSETVL(n-j); v_max = VLEV_FLOAT(&x[j], gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - FLOAT cur_maxf = v_res[0]; + FLOAT cur_maxf = *((FLOAT*)&v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -129,7 +129,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); } } }else{ @@ -153,17 +153,17 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) idx += inc_v; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); if(j < n){ gvl = VSETVL(n-j); v_max = VLSEV_FLOAT(&x[idx], stride_x, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - FLOAT cur_maxf = v_res[0]; + FLOAT cur_maxf = *((FLOAT*)&v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -171,7 +171,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = v_max_index[max_index]; + max_index = *((unsigned int*)&v_max_index+max_index); } } } diff --git a/kernel/riscv64/imin_vector.c b/kernel/riscv64/imin_vector.c index 3afa74d..dfe9a33 100644 --- a/kernel/riscv64/imin_vector.c +++ b/kernel/riscv64/imin_vector.c @@ -129,24 +129,24 @@ asm volatile( j += gvl; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); mask = VMFLEVF_FLOAT(v_min, minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); if(j < n){ gvl = VSETVL(n-j); v_min = VLEV_FLOAT(&x[j], gvl); v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - FLOAT cur_minf = v_res[0]; + FLOAT cur_minf = *((FLOAT*)&v_res); if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); v_min_index = VADDVX_UINT(v_min_index, j, gvl); mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); } } }else{ @@ -190,24 +190,24 @@ asm volatile( idx += inc_v; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); mask = VMFLEVF_FLOAT(v_min, minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); if(j < n){ gvl = VSETVL(n-j); v_min = VLSEV_FLOAT(&x[idx], stride_x, gvl); v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - FLOAT cur_minf = v_res[0]; + FLOAT cur_minf = *((FLOAT*)&v_res); if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); v_min_index = VADDVX_UINT(v_min_index, j, gvl); mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); min_index = VMFIRSTM(mask,gvl); - min_index = v_min_index[min_index]; + min_index = *((unsigned int*)&v_min_index+min_index); } } } diff --git a/kernel/riscv64/izamax_vector.c b/kernel/riscv64/izamax_vector.c index ddb5eab..fdbdc3a 100644 --- a/kernel/riscv64/izamax_vector.c +++ b/kernel/riscv64/izamax_vector.c @@ -34,6 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLSEV_FLOAT vlse_v_f64m8 #define VFREDMAXVS_FLOAT vfredmax_vs_f64m8_f64m1 #define MASK_T vbool8_t @@ -46,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT vmfge_vf_f64m8_b8 #define VMFIRSTM vmfirst_m_b8 #define UINT_V_T vuint64m8_t +#define VSEVU_UINT vse64_v_u64m8 +#define UINT_T long unsigned int #define VIDV_MASK_UINT vid_v_u64m8_m #define VIDV_UINT vid_v_u64m8 #define VADDVX_MASK_UINT vadd_vx_u64m8_m @@ -59,6 +62,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLSEV_FLOAT vlse_v_f32m8 #define VFREDMAXVS_FLOAT vfredmax_vs_f32m8_f32m1 #define MASK_T vbool4_t @@ -71,6 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT vmfge_vf_f32m8_b4 #define VMFIRSTM vmfirst_m_b4 #define UINT_V_T vuint32m8_t +#define UINT_T unsigned int +#define VSEVU_UINT vse32_v_u32m8 #define VIDV_MASK_UINT vid_v_u32m8_m #define VIDV_UINT vid_v_u32m8 #define VADDVX_MASK_UINT vadd_vx_u32m8_m @@ -98,6 +104,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_z0 = VFMVVF_FLOAT_M1(0, gvl); gvl = VSETVL(n); + UINT_T temp_uint[gvl]; v_max_index = VMVVX_UINT(0, gvl); v_max = VFMVVF_FLOAT(-1, gvl); BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); @@ -183,10 +190,12 @@ asm volatile( } vx0 = VFMVVF_FLOAT(0, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = v_res[0]; + maxf = VFMVFS_FLOAT(v_res); mask0 = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask0,gvl); - max_index = v_max_index[max_index]; + VSEVU_UINT(temp_uint,v_max_index,gvl); + max_index = temp_uint[max_index]; + if(j < n){ gvl = VSETVL(n-j); @@ -239,7 +248,7 @@ asm volatile( */ v_max = VFADDVV_FLOAT(vx0, vx1, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - FLOAT cur_maxf = v_res[0]; + FLOAT cur_maxf = VFMVFS_FLOAT(v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -247,7 +256,9 @@ asm volatile( mask0 = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask0,gvl); - max_index = v_max_index[max_index]; + VSEVU_UINT(temp_uint,v_max_index,gvl); + max_index = temp_uint[max_index]; + } } return(max_index+1); diff --git a/kernel/riscv64/izamin_vector.c b/kernel/riscv64/izamin_vector.c index 6e328dc..59c7203 100644 --- a/kernel/riscv64/izamin_vector.c +++ b/kernel/riscv64/izamin_vector.c @@ -35,6 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLSEV_FLOAT vlse_v_f64m8 #define VFREDMINVS_FLOAT vfredmin_vs_f64m8_f64m1 #define MASK_T vbool8_t @@ -47,6 +48,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT vmfle_vf_f64m8_b8 #define VMFIRSTM vmfirst_m_b8 #define UINT_V_T vuint64m8_t +#define VSEVU_UINT vse64_v_u64m8 +#define UINT_T long unsigned int #define VIDV_MASK_UINT vid_v_u64m8_m #define VIDV_UINT vid_v_u64m8 #define VADDVX_MASK_UINT vadd_vx_u64m8_m @@ -60,6 +63,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLSEV_FLOAT vlse_v_f32m8 #define VFREDMINVS_FLOAT vfredmin_vs_f32m8_f32m1 #define MASK_T vbool4_t @@ -72,6 +76,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT vmfle_vf_f32m8_b4 #define VMFIRSTM vmfirst_m_b4 #define UINT_V_T vuint32m8_t +#define UINT_T unsigned int +#define VSEVU_UINT vse32_v_u32m8 #define VIDV_MASK_UINT vid_v_u32m8_m #define VIDV_UINT vid_v_u32m8 #define VADDVX_MASK_UINT vadd_vx_u32m8_m @@ -98,6 +104,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_max = VFMVVF_FLOAT_M1(FLT_MAX, gvl); gvl = VSETVL(n); + UINT_T temp_uint[gvl]; v_min_index = VMVVX_UINT(0, gvl); v_min = VFMVVF_FLOAT(FLT_MAX, gvl); BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); @@ -182,10 +189,11 @@ asm volatile( ix += inc_xv; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = VFMVFS_FLOAT(v_res); mask0 = VMFLEVF_FLOAT(v_min, minf, gvl); min_index = VMFIRSTM(mask0,gvl); - min_index = v_min_index[min_index]; + VSEVU_UINT(temp_uint,v_min_index,gvl); + min_index = temp_uint[min_index]; if(j < n){ gvl = VSETVL(n-j); @@ -238,7 +246,7 @@ asm volatile( */ v_min = VFADDVV_FLOAT(vx0, vx1, gvl); v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - FLOAT cur_minf = v_res[0]; + FLOAT cur_minf = VFMVFS_FLOAT(v_res); if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); @@ -246,7 +254,9 @@ asm volatile( mask0 = VMFLEVF_FLOAT(v_min, cur_minf, gvl); min_index = VMFIRSTM(mask0,gvl); - min_index = v_min_index[min_index]; + VSEVU_UINT(temp_uint,v_min_index,gvl); + min_index = temp_uint[min_index]; + } } return(min_index+1); diff --git a/kernel/riscv64/max_vector.c b/kernel/riscv64/max_vector.c index 0fc59b7..b988513 100644 --- a/kernel/riscv64/max_vector.c +++ b/kernel/riscv64/max_vector.c @@ -77,14 +77,14 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl * 2; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); } for(;j maxf) - maxf = v_res[0]; + if(*((FLOAT*)&v_res) > maxf) + maxf = *((FLOAT*)&v_res); j += gvl; } }else{ @@ -103,14 +103,14 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) idx += inc_xv * 2; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl); - maxf = v_res[0]; + maxf = *((FLOAT*)&v_res); } for(;j maxf) - maxf = v_res[0]; + if(*((FLOAT*)&v_res) > maxf) + maxf = *((FLOAT*)&v_res); j += gvl; } } diff --git a/kernel/riscv64/min_vector.c b/kernel/riscv64/min_vector.c index 8223fa8..be0803d 100644 --- a/kernel/riscv64/min_vector.c +++ b/kernel/riscv64/min_vector.c @@ -77,14 +77,14 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl * 2; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = *((FLOAT*)&v_res); } for(;j +#include #define KERNEL16x4_I \ "addi t1, %[PB], 1*4 \n\t"\ diff --git a/kernel/riscv64/symv_L_vector.c b/kernel/riscv64/symv_L_vector.c index de89175..6588f4d 100644 --- a/kernel/riscv64/symv_L_vector.c +++ b/kernel/riscv64/symv_L_vector.c @@ -31,11 +31,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLEV_FLOAT vle_v_f32m4 #define VLSEV_FLOAT vlse_v_f32m4 #define VSEV_FLOAT vse_v_f32m4 #define VSSEV_FLOAT vsse_v_f32m4 -#define VFREDSUM_FLOAT vfredsum_vs_f32m4_f32m1 +#define VFREDSUM_FLOAT vfredusum_vs_f32m4_f32m1 #define VFMACCVV_FLOAT vfmacc_vv_f32m4 #define VFMACCVF_FLOAT vfmacc_vf_f32m4 #define VFMVVF_FLOAT vfmv_v_f_f32m4 @@ -46,11 +47,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLEV_FLOAT vle_v_f64m4 #define VLSEV_FLOAT vlse_v_f64m4 #define VSEV_FLOAT vse_v_f64m4 #define VSSEV_FLOAT vsse_v_f64m4 -#define VFREDSUM_FLOAT vfredsum_vs_f64m4_f64m1 +#define VFREDSUM_FLOAT vfredusum_vs_f64m4_f64m1 #define VFMACCVV_FLOAT vfmacc_vv_f64m4 #define VFMACCVF_FLOAT vfmacc_vf_f64m4 #define VFMVVF_FLOAT vfmv_v_f_f64m4 @@ -98,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA i += gvl; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < m){ gvl = VSETVL(m-i); vy = VLEV_FLOAT(&y[i], gvl); @@ -109,7 +111,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLEV_FLOAT(&x[i], gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[j] += alpha * temp2; @@ -143,7 +145,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA iy += inc_yv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < m){ gvl = VSETVL(m-i); vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); @@ -154,7 +156,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLEV_FLOAT(&x[i], gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[jy] += alpha * temp2; @@ -189,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA ix += inc_xv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < m){ gvl = VSETVL(m-i); vy = VLEV_FLOAT(&y[i], gvl); @@ -200,7 +202,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[j] += alpha * temp2; @@ -240,7 +242,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA iy += inc_yv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < m){ gvl = VSETVL(m-i); vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); @@ -251,7 +253,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[jy] += alpha * temp2; diff --git a/kernel/riscv64/symv_U_vector.c b/kernel/riscv64/symv_U_vector.c index 7229a48..31104ea 100644 --- a/kernel/riscv64/symv_U_vector.c +++ b/kernel/riscv64/symv_U_vector.c @@ -31,11 +31,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLEV_FLOAT vle_v_f32m4 #define VLSEV_FLOAT vlse_v_f32m4 #define VSEV_FLOAT vse_v_f32m4 #define VSSEV_FLOAT vsse_v_f32m4 -#define VFREDSUM_FLOAT vfredsum_vs_f32m4_f32m1 +#define VFREDSUM_FLOAT vfredusum_vs_f32m4_f32m1 #define VFMACCVV_FLOAT vfmacc_vv_f32m4 #define VFMACCVF_FLOAT vfmacc_vf_f32m4 #define VFMVVF_FLOAT vfmv_v_f_f32m4 @@ -47,11 +48,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLEV_FLOAT vle_v_f64m4 #define VLSEV_FLOAT vlse_v_f64m4 #define VSEV_FLOAT vse_v_f64m4 #define VSSEV_FLOAT vsse_v_f64m4 -#define VFREDSUM_FLOAT vfredsum_vs_f64m4_f64m1 +#define VFREDSUM_FLOAT vfredusum_vs_f64m4_f64m1 #define VFMACCVV_FLOAT vfmacc_vv_f64m4 #define VFMACCVF_FLOAT vfmacc_vf_f64m4 #define VFMVVF_FLOAT vfmv_v_f_f64m4 @@ -100,7 +102,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA i += gvl; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < j){ gvl = VSETVL(j-i); vy = VLEV_FLOAT(&y[i], gvl); @@ -111,7 +113,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLEV_FLOAT(&x[i], gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[j] += temp1 * a_ptr[j] + alpha * temp2; @@ -144,7 +146,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA iy += inc_yv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < j){ gvl = VSETVL(j-i); vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); @@ -155,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLEV_FLOAT(&x[i], gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[jy] += temp1 * a_ptr[j] + alpha * temp2; @@ -189,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA ix += inc_xv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < j){ gvl = VSETVL(j-i); vy = VLEV_FLOAT(&y[i], gvl); @@ -200,7 +202,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[j] += temp1 * a_ptr[j] + alpha * temp2; @@ -239,7 +241,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA iy += inc_yv; } v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 = v_res[0]; + temp2 = VFMVFS_FLOAT(v_res); if(i < j){ gvl = VSETVL(j-i); vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); @@ -250,7 +252,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); vr = VFMULVV_FLOAT(vx, va, gvl); v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); - temp2 += v_res[0]; + temp2 += VFMVFS_FLOAT(v_res); } } y[jy] += temp1 * a_ptr[j] + alpha * temp2; diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c index 5cd65b2..9dbeba9 100644 --- a/kernel/riscv64/zamax_vector.c +++ b/kernel/riscv64/zamax_vector.c @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLSEV_FLOAT vlse_v_f32m8 #define VFREDMAXVS_FLOAT vfredmax_vs_f32m8_f32m1 #define MASK_T vbool4_t @@ -42,11 +43,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFRSUBVF_MASK_FLOAT vfrsub_vf_f32m8_m #define VFMAXVV_FLOAT vfmax_vv_f32m8 #define VFADDVV_FLOAT vfadd_vv_f32m8 + #else #define VSETVL(n) vsetvl_e64m8(n) #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLSEV_FLOAT vlse_v_f64m8 #define VFREDMAXVS_FLOAT vfredmax_vs_f64m8_f64m1 #define MASK_T vbool8_t @@ -56,6 +59,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFRSUBVF_MASK_FLOAT vfrsub_vf_f64m8_m #define VFMAXVV_FLOAT vfmax_vv_f64m8 #define VFADDVV_FLOAT vfadd_vv_f64m8 + #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) @@ -91,7 +95,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) ix += inc_xv; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = v_res[0]; + maxf = VFMVFS_FLOAT(v_res); if(j maxf) - maxf = v_res[0]; + + if(VFMVFS_FLOAT(v_res)> maxf) + maxf = VFMVFS_FLOAT(v_res); } return(maxf); } diff --git a/kernel/riscv64/zamin_vector.c b/kernel/riscv64/zamin_vector.c index 9d567b3..dc58075 100644 --- a/kernel/riscv64/zamin_vector.c +++ b/kernel/riscv64/zamin_vector.c @@ -34,6 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VLSEV_FLOAT vlse_v_f32m8 #define VFREDMINVS_FLOAT vfredmin_vs_f32m8_f32m1 #define MASK_T vbool4_t @@ -48,6 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VLSEV_FLOAT vlse_v_f64m8 #define VFREDMINVS_FLOAT vfredmin_vs_f64m8_f64m1 #define MASK_T vbool8_t @@ -92,7 +94,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) ix += inc_xv; } v_res = VFREDMINVS_FLOAT(v_res, v_min, v_max, gvl); - minf = v_res[0]; + minf = VFMVFS_FLOAT(v_res); if(j