#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#undef CASE_VFMA_OPCODE_LMULS
#undef CASE_VFMA_OPCODE_COMMON
+// clang-format off
+#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
+ RISCV::PseudoV##OP##_##LMUL##_TIED
+
+#define CASE_WIDEOP_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
+// clang-format on
+
+#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
+ case RISCV::PseudoV##OP##_##LMUL##_TIED: \
+ NewOpc = RISCV::PseudoV##OP##_##LMUL; \
+ break;
+
+#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
+
+MachineInstr *RISCVInstrInfo::convertToThreeAddress(
+ MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
+ // clang-format off
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
+ }
+ //clang-format on
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ MIB.copyImplicitOps(MI);
+
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
+ }
+ }
+
+ return MIB;
+ }
+ }
+
+ return nullptr;
+}
+
+#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
+#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
+#undef CASE_WIDEOP_OPCODE_LMULS
+#undef CASE_WIDEOP_OPCODE_COMMON
+
Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineInstr &MI,
+ LiveVariables *LV) const override;
+
Register getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
const DebugLoc &DL, int64_t Amount) const;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoTiedBinaryNoMask<VReg RetClass,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let ForceTailAgnostic = 1;
+ let isConvertibleToThreeAddress = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
LMULInfo MInfo,
string Constraint = ""> {
let VLMul = MInfo.value in {
+ def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class,
+ Constraint>;
def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class,
Constraint>;
}
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatTiedBinaryNoMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TIED")
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+
class VPatTiedBinaryMask<string intrinsic_name,
string inst,
ValueType result_type,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
+ def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
let AddedComplexity = 1 in
def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i32 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i32 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i32 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i32 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i32 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i32 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i32 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i32 %2)
+
+ ret <vscale x 8 x double> %a
+}
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i64 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i64 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i64 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i64 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i64 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i64 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i64 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i64 %2)
+
+ ret <vscale x 8 x double> %a
+}
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i32 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i32 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i32 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i32 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i32 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i32 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i32 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i32 %2)
+
+ ret <vscale x 8 x double> %a
+}
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i64 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i64 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i64 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i64 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i64 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i64 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i64 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i64 %2)
+
+ ret <vscale x 8 x double> %a
+}
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}