From 875c76de2b6ad67b10c027ed74422642cf4d1aed Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Feb 2021 09:09:22 -0800 Subject: [PATCH] [RISCV] Add support for matching .vx and .vi forms of binary instructions for fixed vectors. Unlike scalable vectors, I'm only using a ComplexPattern for the immediate itself. The vmv_v_x is matched explicitly. We igore the VL argument when matching a binary operator, but we do check it when matching splat directly. I left out tests for vXi64 as they fail on rv32 right now. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D96365 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 39 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 10 + llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 9 +- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 11 +- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 76 +- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll | 1635 ++++++++++++++++++++ 6 files changed, 1760 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 62fe646..05620fc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1015,7 +1015,8 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { if (N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && + N.getOpcode() != RISCVISD::VMV_V_X_VL) return false; SplatVal = N.getOperand(0); return true; @@ -1023,7 +1024,8 @@ bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) || + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && + N.getOpcode() != RISCVISD::VMV_V_X_VL) || !isa(N.getOperand(0))) return false; @@ -1053,7 +1055,8 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) || + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && + N.getOpcode() != RISCVISD::VMV_V_X_VL) || !isa(N.getOperand(0))) return false; @@ -1068,6 +1071,36 @@ bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { return true; } +bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, + SDValue &Imm) { + if (auto *C = dyn_cast(N)) { + int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); + + if (!isInt<5>(ImmVal)) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); + return true; + } + + return false; +} + +bool RISCVDAGToDAGISel::selectRVVUimm5(SDValue N, unsigned Width, + SDValue &Imm) { + if (auto *C = dyn_cast(N)) { + int64_t ImmVal = C->getSExtValue(); + + if (!isUInt<5>(ImmVal)) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); + return true; + } + + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 0264de7..1e9dba3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -57,6 +57,16 @@ public: bool selectVSplatSimm5(SDValue N, SDValue &SplatVal); bool selectVSplatUimm5(SDValue N, SDValue &SplatVal); + bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); + template bool selectRVVSimm5(SDValue N, SDValue &Imm) { + return selectRVVSimm5(N, Width, Imm); + } + + bool selectRVVUimm5(SDValue N, unsigned Width, SDValue &Imm); + template bool selectRVVUimm5(SDValue N, SDValue &Imm) { + return selectRVVUimm5(N, Width, Imm); + } + void selectVLSEG(SDNode *Node, unsigned IntNo, bool IsStrided); void selectVLSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided); void selectVLSEGFF(SDNode *Node); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 6f299b5..e3e8392 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -4056,15 +4056,8 @@ foreach vti = AllVectors in { (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMV_V_V_"#vti.LMul.MX) $rs1, GPR:$vl, vti.SEW)>; -} -foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), - (!cast("PseudoVMV_V_X_"#vti.LMul.MX) - $rs2, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Vector (riscv_vmv_v_x_vl simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), - (!cast("PseudoVMV_V_I_"#vti.LMul.MX) - simm5:$imm5, GPR:$vl, vti.SEW)>; + // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index e99c8e3..2976cb5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -126,7 +126,6 @@ class VPatBinarySDNode_XI vti.Vector, vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>; def : VPatBinarySDNode_XI; } @@ -164,11 +163,11 @@ multiclass VPatBinarySDNode_VV_VX_VI; def : VPatBinarySDNode_XI; def : VPatBinarySDNode_XI(SplatPat#_#ImmType), ImmType>; @@ -423,11 +422,11 @@ defm "" : VPatBinarySDNode_VV_VX; // Handle VRSUB specially since it's the only integer binary op with reversed // pattern operands foreach vti = AllIntegerVectors in { - def : Pat<(sub (vti.Vector (SplatPat XLenVT:$rs2)), + def : Pat<(sub (vti.Vector (SplatPat GPR:$rs2)), (vti.Vector vti.RegClass:$rs1)), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.SEW)>; - def : Pat<(sub (vti.Vector (SplatPat_simm5 XLenVT:$rs2)), + def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1)), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.SEW)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index f1d6952..9517ac9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -97,11 +97,22 @@ def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>, def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; -def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; +def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; +// Ignore the vl operand. def SplatFPOp : PatFrag<(ops node:$op), (riscv_vfmv_v_f_vl node:$op, srcvalue)>; +def sew8simm5 : ComplexPattern", []>; +def sew16simm5 : ComplexPattern", []>; +def sew32simm5 : ComplexPattern", []>; +def sew64simm5 : ComplexPattern", []>; + +def sew8uimm5 : ComplexPattern", []>; +def sew16uimm5 : ComplexPattern", []>; +def sew32uimm5 : ComplexPattern", []>; +def sew64uimm5 : ComplexPattern", []>; + class VPatBinaryVL_VV; +class VPatBinaryVL_XI : + Pat<(result_type (vop + (vop_type vop_reg_class:$rs1), + (vop_type (SplatPatKind xop_kind:$rs2)), + (mask_type true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#_#suffix#_# vlmul.MX) + vop_reg_class:$rs1, + xop_kind:$rs2, + GPR:$vl, sew)>; + multiclass VPatBinaryVL_VV_VX { foreach vti = AllIntegerVectors in { def : VPatBinaryVL_VV; - // FIXME: Support splats. + def : VPatBinaryVL_XI; } } @@ -136,7 +172,15 @@ multiclass VPatBinaryVL_VV_VX_VI; - // FIXME: Support splats. + def : VPatBinaryVL_XI; + def : VPatBinaryVL_XI(SplatPat#_#ImmType), + ImmType>; } } @@ -214,6 +258,20 @@ foreach mti = AllMasks in { // 12.1. Vector Single-Width Integer Add and Subtract defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX; +// Handle VRSUB specially since it's the only integer binary op with reversed +// pattern operands +foreach vti = AllIntegerVectors in { + def : Pat<(riscv_sub_vl (vti.Vector (SplatPat GPR:$rs2)), + (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVRSUB_VX_"# vti.LMul.MX) + vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), + (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) + vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.SEW)>; +} // 12.5. Vector Bitwise Logical Instructions defm "" : VPatBinaryVL_VV_VX_VI; @@ -240,6 +298,18 @@ defm "" : VPatBinaryVL_VV_VX; defm "" : VPatBinaryVL_VV_VX; defm "" : VPatBinaryVL_VV_VX; +// 12.17. Vector Integer Move Instructions +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMV_V_X_"#vti.LMul.MX) + $rs2, GPR:$vl, vti.SEW)>; + defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); + def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMV_V_I_"#vti.LMul.MX) + XLenVT:$imm5, GPR:$vl, vti.SEW)>; +} + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 6ebaa47..5903754 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4577,3 +4577,1638 @@ define void @umax_v4i64(<4 x i64>* %x, <4 x i64>* %y) { store <4 x i64> %c, <4 x i64>* %x ret void } + +define void @add_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: add_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: add_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: add_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: add_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: add_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: add_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: add_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: add_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: add_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: add_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: add_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: add_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: sub_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: sub_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: sub_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: sub_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: sub_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: sub_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sub_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sub_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sub_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sub_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sub_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sub_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @mul_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: mul_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = mul <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @mul_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: mul_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = mul <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @mul_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: mul_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = mul <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @mul_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: mul_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = mul <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @mul_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: mul_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = mul <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @mul_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: mul_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = mul <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: and_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -2, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: and_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -2, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: and_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -2, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: and_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: and_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: and_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: and_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: and_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: and_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: and_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: and_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: and_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: or_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -2, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: or_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -2, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: or_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -2, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: or_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: or_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: or_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: or_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: or_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: or_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: or_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: or_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: or_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: xor_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: xor_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: xor_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: xor_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: xor_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: xor_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: xor_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: xor_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: xor_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: xor_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: xor_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: xor_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @lshr_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: lshr_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = lshr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @lshr_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: lshr_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = lshr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @lshr_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: lshr_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = lshr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @lshr_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: lshr_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = lshr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @lshr_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: lshr_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = lshr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @lshr_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: lshr_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = lshr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @ashr_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: ashr_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = ashr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @ashr_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: ashr_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = ashr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @ashr_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: ashr_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = ashr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @ashr_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: ashr_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = ashr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @ashr_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: ashr_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = ashr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @ashr_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: ashr_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = ashr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @shl_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: shl_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = shl <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @shl_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: shl_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = shl <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @shl_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: shl_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = shl <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @shl_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: shl_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = shl <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @shl_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: shl_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = shl <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @shl_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: shl_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = shl <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sdiv_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sdiv_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sdiv <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sdiv_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sdiv_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sdiv <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sdiv_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sdiv_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sdiv <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @srem_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: srem_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = srem <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @srem_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: srem_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = srem <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @srem_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: srem_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = srem <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @udiv_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: udiv_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = udiv <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @udiv_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: udiv_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = udiv <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @udiv_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: udiv_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = udiv <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @urem_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: urem_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = urem <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @urem_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: urem_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = urem <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @urem_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: urem_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = urem <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} -- 2.7.4