--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+
+/*
+** foo1:
+** lw\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+int32_t foo1 (void *base, size_t vl)
+{
+ vint32m1_t v = *(vint32m1_t*)base;
+ int32_t scalar = __riscv_vmv_x_s_i32m1_i32 (v);
+ return scalar;
+}
+
+/*
+** foo2:
+** lw\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+int32_t foo2 (void *base, size_t vl)
+{
+ vint32mf2_t v = *(vint32mf2_t*)base;
+ int32_t scalar = __riscv_vmv_x_s_i32mf2_i32 (v);
+ return scalar;
+}
+
+/*
+** foo3:
+** lw\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+int32_t foo3 (int32_t *base, size_t vl)
+{
+ vint32m1_t v = *(vint32m1_t*)(base+1);
+ int32_t scalar = __riscv_vmv_x_s_i32m1_i32 (v);
+ return scalar;
+}
+
+/*
+** foo4:
+** vl1re32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetvli\tzero,[a-x0-9]+,e32,m1,t[au],m[au]
+** vadd.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** vsetvli\tzero,[a-x0-9]+,e32,m2,t[au],m[au]
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** ret
+*/
+int32_t foo4 (void *base, size_t vl)
+{
+ vint32m1_t v = *(vint32m1_t*)base;
+ v = __riscv_vadd_vv_i32m1 (v,v,vl);
+ int32_t scalar = __riscv_vmv_x_s_i32m1_i32 (v);
+ vint32m2_t new_v = __riscv_vmv_v_x_i32m2 (scalar, vl);
+ scalar = __riscv_vmv_x_s_i32m2_i32 (new_v);
+ return scalar;
+}
+
+/*
+** foo5:
+** flw\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+float foo5 (int32_t *base, size_t vl)
+{
+ vint32m1_t v = *(vint32m1_t*)(base+1);
+ int32_t scalar = __riscv_vmv_x_s_i32m1_i32 (v);
+ return *(float*)&scalar;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <riscv_vector.h>
+
+/*
+** foo1:
+** ld\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+int64_t foo (void *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)base;
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return scalar;
+}
+
+/*
+** foo2:
+** ld\t[a-x0-9]+,8\([a-x0-9]+\)
+** ret
+*/
+int64_t foo2 (int64_t *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)(base+1);
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return scalar;
+}
+
+/*
+** foo3:
+** vl1re64\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetvli\tzero,[a-x0-9]+,e64,m1,t[au],m[au]
+** vadd.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** vsetvli\tzero,[a-x0-9]+,e64,m2,t[au],m[au]
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** vmv.v.x\tv[0-9]+,[a-x0-9]+
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** ret
+*/
+int64_t foo3 (void *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)base;
+ v = __riscv_vadd_vv_i64m1 (v,v,vl);
+
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ vint64m2_t new_v = __riscv_vmv_v_x_i64m2 (scalar, vl);
+ scalar = __riscv_vmv_x_s_i64m2_i64 (new_v);
+ return scalar;
+}
+
+/*
+** foo4:
+** fld\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+double foo4 (int64_t *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)(base+1);
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return *(double*)&scalar;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+/*
+** foo1:
+** lw\t[a-x0-9]+,4\([a-x0-9]+\)
+** lw\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+int64_t foo (void *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)base;
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return scalar;
+}
+
+/*
+** foo2:
+** lw\t[a-x0-9]+,12\([a-x0-9]+\)
+** lw\t[a-x0-9]+,8\([a-x0-9]+\)
+** ret
+*/
+int64_t foo2 (int64_t *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)(base+1);
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return scalar;
+}
+
+/*
+** foo3:
+** ...
+** vsrl.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** ret
+*/
+int64_t foo3 (void *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)base;
+ v = __riscv_vadd_vv_i64m1 (v,v,vl);
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return scalar;
+}
+
+/*
+** foo4:
+** fld\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+double foo4 (int64_t *base, size_t vl)
+{
+ vint64m1_t v = *(vint64m1_t*)(base+1);
+ int64_t scalar = __riscv_vmv_x_s_i64m1_i64 (v);
+ return *(double*)&scalar;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+
+/*
+** foo1:
+** flw\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+float foo1 (void *base, size_t vl)
+{
+ vfloat32m1_t v = *(vfloat32m1_t*)base;
+ float scalar = __riscv_vfmv_f_s_f32m1_f32 (v);
+ return scalar;
+}
+
+/*
+** foo2:
+** flw\t[a-x0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+float foo2 (void *base, size_t vl)
+{
+ vfloat32mf2_t v = *(vfloat32mf2_t*)base;
+ float scalar = __riscv_vfmv_f_s_f32mf2_f32 (v);
+ return scalar;
+}
+
+/*
+** foo3:
+** flw\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+float foo3 (float *base, size_t vl)
+{
+ vfloat32m1_t v = *(vfloat32m1_t*)(base+1);
+ float scalar = __riscv_vfmv_f_s_f32m1_f32 (v);
+ return scalar;
+}
+
+/*
+** foo4:
+** lw\t[a-x0-9]+,4\([a-x0-9]+\)
+** ret
+*/
+int32_t foo4 (float *base, size_t vl)
+{
+ vfloat32m1_t v = *(vfloat32m1_t*)(base+1);
+ float scalar = __riscv_vfmv_f_s_f32m1_f32 (v);
+ return *(int32_t*)&scalar;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+/*
+** foo:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo2:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo2 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo3:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo3 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo4:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo4 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo5:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo5 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo6:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo6 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo7:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo7 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo8:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo8 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo9:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+** ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (-15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, -15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo11:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo11 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo12:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo12 (void *base, void *out, size_t vl)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, 0, vl);
+ *(vfloat64m2_t*)out = v;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+/*
+** foo:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo2:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo2 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo3:
+** ...
+** andi\t[a-x0-9]+,\s*[a-x0-9]+,\s*1
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo3 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo4:
+** ...
+** andi\t[a-x0-9]+,\s*[a-x0-9]+,\s*1
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo4 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo5:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+*/
+void foo5 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo6:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo6 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo7:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+*/
+void foo7 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo8:
+** ...
+** vfmv.s.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo8 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo9:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+** ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (-15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+** ...
+** andi\t[a-x0-9]+,\s*[a-x0-9]+,\s*1
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, -15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo11:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo11 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2 (0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo12:
+** ...
+** andi\t[a-x0-9]+,\s*[a-x0-9]+,\s*1
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo12 (void *base, void *out, size_t vl)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_s_f_f64m2_tu (merge, 0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo13:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo13 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (0xAAAAA, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo14:
+** ...
+** vmv.s.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+*/
+void foo14 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2_tu (merge, 0xAAAAA, vl);
+ *(vint64m2_t*)out = v;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+/*
+** foo:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo2:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo2 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo3:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo3 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo4:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo4 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo5:
+** ...
+** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo5 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo6:
+** ...
+** vfmv.v.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo6 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo7:
+** ...
+** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo7 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo8:
+** ...
+** vfmv.v.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo8 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo9:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+** ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (-15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+** ret
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, -15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo11:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo11 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo12:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo12 (void *base, void *out, size_t vl)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, 0, vl);
+ *(vfloat64m2_t*)out = v;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+/*
+** foo:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo2:
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** vsetvli\tzero,a2,e64,m2,t[au],m[au]
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo2 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo3:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo3 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, scalar, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo4:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+** ret
+*/
+void foo4 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, scalar, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo5:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+*/
+void foo5 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo6:
+** ...
+** vfmv.v.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo6 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo7:
+** ...
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** ...
+*/
+void foo7 (void *base, void *out, size_t vl, int64_t x)
+{
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, x, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo8:
+** ...
+** vfmv.v.f\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo8 (void *base, void *out, size_t vl, double x)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, x, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo9:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+** ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (-15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+** ...
+** vmv.v.i\tv[0-9]+,\s*-15
+** ...
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, -15, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo11:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo11 (void *base, void *out, size_t vl)
+{
+ double scalar = *(double*)(base + 100);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2 (0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo12:
+** ...
+** vmv.v.i\tv[0-9]+,\s*0
+** ...
+** ret
+*/
+void foo12 (void *base, void *out, size_t vl)
+{
+ vfloat64m2_t merge = *(vfloat64m2_t*) (base + 200);
+ vfloat64m2_t v = __riscv_vfmv_v_f_f64m2_tu (merge, 0, vl);
+ *(vfloat64m2_t*)out = v;
+}
+
+/*
+** foo13:
+** ...
+** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void foo13 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (0xAAAAA, vl);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo14:
+** ...
+** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
+** ...
+*/
+void foo14 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t merge = *(vint64m2_t*) (base + 200);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2_tu (merge, 0xAAAAA, vl);
+ *(vint64m2_t*)out = v;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ size_t new_vl = 101;
+
+ vfloat64m4_t v2 = __riscv_vle64_v_f64m4 ((double *)in, new_vl);
+ double f = __riscv_vfmv_f_s_f64m4_f64 (v2);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v3 = __riscv_vle64_v_f64m4 ((double *)(in + i + 500), new_vl);
+ vfloat64m4_t v4 = __riscv_vle64_v_f64m4 ((double *)(in + i + 600), new_vl);
+ v4 = __riscv_vfmacc_vf_f64m4 (v4, f, v3, new_vl);
+
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v4, new_vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ size_t new_vl = 101;
+
+ if (m > n) {
+ vfloat64m4_t v2 = __riscv_vle64_v_f64m4 ((double *)in, new_vl);
+ double f = __riscv_vfmv_f_s_f64m4_f64 (v2);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v3 = __riscv_vle64_v_f64m4 ((double *)(in + i + 500), new_vl);
+ vfloat64m4_t v4 = __riscv_vle64_v_f64m4 ((double *)(in + i + 600), new_vl);
+ v4 = __riscv_vfmacc_vf_f64m4 (v4, f, v3, new_vl);
+
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v4, new_vl);
+ }
+ } else {
+ ;
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), 4);
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, __riscv_vsetvlmax_e32m1 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, 4);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), 4);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, 3);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, 3);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+float f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), 4);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, 4);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), 4);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, 4);
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+float f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), 4);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, 4);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), 4);
+ v2 = __riscv_vfadd_vv_f32mf2 (v,v2,4);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, 4);
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ vbool4_t mask = *(vbool4_t*) (in + 1000000);
+ *(vbool4_t*) (out + 1000000) = mask;
+
+ vfloat64m1_t v = *(vfloat64m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f64m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f64m1_f64 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+
+double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ vbool32_t mask = *(vbool32_t*) (in + 1000000);
+ *(vbool32_t*) (out + 1000000) = mask;
+
+ vfloat64m1_t v = *(vfloat64m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f64m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f64m1_f64 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+float f1 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), __riscv_vsetvlmax_e32mf2 ());
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 ((float*) (in + 300000), 3);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ v = __riscv_vfadd_vv_f32m1 (v,v, 3);
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f2 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, 3);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f3 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ v = __riscv_vfmv_s_f_f32mf2_tu (v, scalar, 3);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+
+ vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
+ for (size_t i = 0; i < n; i++)
+ {
+ v = __riscv_vfmv_s_f_f32m1_tu (v, (scalar + i), 3);
+ }
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m1_t v = __riscv_vle64_v_f64m1 ((double *)(in + i + 200), 3);
+ __riscv_vse64_v_f64m1 ((double *)(out + i + 200), v, 3);
+
+ vfloat64m1_t v2 = __riscv_vle64_v_f64m1_tumu (mask, v, (double *)(in + i + 300), 3);
+ __riscv_vse64_v_f64m1_m (mask, (double *)(out + i + 300), v2, 3);
+ }
+
+ vfloat32mf2_t v = __riscv_vfmv_s_f_f32mf2 (scalar, 3);
+ *(vfloat32mf2_t*)(out + 100000) = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m1_t v = __riscv_vle64_v_f64m1 ((double *)(in + i + 200), 3);
+ v = __riscv_vfadd_vv_f64m1 (v,v,3);
+ __riscv_vse64_v_f64m1 ((double *)(out + i + 200), v, 3);
+ }
+
+ vfloat32m1_t v = __riscv_vfmv_s_f_f32m1 (scalar, 3);
+ *(vfloat32m1_t*)(out + 100000) = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m2_t v = __riscv_vle64_v_f64m2 ((double *)(in + i + 200), 3);
+ __riscv_vse64_v_f64m2 ((double *)(out + i + 200), v, 3);
+ }
+
+ vfloat32m1_t v = __riscv_vfmv_s_f_f32m1 (scalar, 3);
+ *(vfloat32m1_t*)(out + 100000) = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v = __riscv_vle64_v_f64m4 ((double *)(in + i + 200), 3);
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v, 3);
+ }
+
+ vfloat32m1_t v = __riscv_vfmv_s_f_f32m1 (scalar, 3);
+ *(vfloat32m1_t*)(out + 100000) = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, float scalar)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v = __riscv_vle64_v_f64m4 ((double *)(in + i + 200), 3);
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v, 3);
+ }
+
+ vfloat32m1_t v = __riscv_vfmv_s_f_f32m1 (scalar, 3);
+ *(vfloat32m1_t*)(out + 100000) = v;
+ return __riscv_vfmv_f_s_f32m1_f32 (v);
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v2 = __riscv_vle64_v_f64m4 ((double *)(in + i + 200), vl);
+ double f = __riscv_vfmv_f_s_f64m4_f64 (v2);
+ vfloat64m4_t v3 = __riscv_vle64_v_f64m4 ((double *)(in + i + 500), vl);
+ vfloat64m4_t v4 = __riscv_vle64_v_f64m4 ((double *)(in + i + 600), vl);
+ v4 = __riscv_vfmacc_vf_f64m4 (v4, f, v3, vl);
+
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v4, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ vfloat64m4_t v2 = *(vfloat64m4_t*)in;
+ for (size_t i = 0; i < n; i++)
+ {
+ double f = __riscv_vfmv_f_s_f64m4_f64 (v2);
+ asm volatile ("":::"memory");
+ size_t new_vl = 101;
+ vfloat64m4_t v3 = __riscv_vle64_v_f64m4 ((double *)(in + i + 500), new_vl);
+ vfloat64m4_t v4 = __riscv_vle64_v_f64m4 ((double *)(in + i + 600), new_vl);
+ v4 = __riscv_vfmacc_vf_f64m4 (v4, f, v3, new_vl);
+
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v4, new_vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*0,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl, double scalar)
+{
+ vfloat64m4_t v2 = __riscv_vle64_v_f64m4 ((double *)in, vl);
+ double f = __riscv_vfmv_f_s_f64m4_f64 (v2);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat64m4_t v3 = __riscv_vle64_v_f64m4 ((double *)(in + i + 500), vl);
+ vfloat64m4_t v4 = __riscv_vle64_v_f64m4 ((double *)(in + i + 600), vl);
+ v4 = __riscv_vfmacc_vf_f64m4 (v4, f, v3, vl);
+
+ __riscv_vse64_v_f64m4 ((double *)(out + i + 200), v4, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vsetivli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */