From: Craig Topper Date: Mon, 29 Mar 2021 17:11:18 +0000 (-0700) Subject: [RISCV] When custom iseling masked loads/stores, copy the mask into V0 instead of... X-Git-Tag: llvmorg-14-init~11010 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3dd4aa7d09599507d1f801ffe4bec4c9eebbb8da;p=platform%2Fupstream%2Fllvm.git [RISCV] When custom iseling masked loads/stores, copy the mask into V0 instead of virtual register. This matches what we do in our isel patterns. In our internal testing we've found this is needed to make the fast register allocator happy at -O0. Otherwise it may assign V0 to an earlier operand and find itself with no registers left when it reaches the mask operand. By using V0 explicitly, the fast register allocator will see it when it checks for phys register usages before it starts allocating vregs. I'll try to update this with a test case. Unfortunately, this does appear to prevent some instruction reordering by the pre-RA scheduler which leads to the increased spills seen in some tests. I suspect that problem could already occur for other instructions that already used V0 directly. There's a lot of repeated code here that could do with some wrapper functions. Not sure if that should be at the level of the new code that deals with V0. That would require multiple output parameters to pass the glue, chain and register back. Maybe it should be at a higher level over the entire set of push_backs. Reviewed By: frasercrmck, HsiangKai Differential Revision: https://reviews.llvm.org/D99367 --- diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d1f4cc2..4d262a5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -134,8 +134,12 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, MVT XLenVT = Subtarget->getXLenVT(); RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; if (IsMasked) { SmallVector Regs(Node->op_begin() + CurOp, Node->op_begin() + CurOp + NF); @@ -148,13 +152,20 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, Operands.push_back(Base); // Base pointer. if (IsStrided) Operands.push_back(Node->getOperand(CurOp++)); // Stride. - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); const RISCV::VLSEGPseudo *P = RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, ScalarSize, static_cast(LMUL)); @@ -184,6 +195,9 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; SmallVector Operands; if (IsMasked) { @@ -196,13 +210,20 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { SDValue Base; SelectBaseAddr(Node->getOperand(CurOp++), Base); Operands.push_back(Base); // Base pointer. - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); const RISCV::VLSEGPseudo *P = RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, ScalarSize, static_cast(LMUL)); @@ -235,8 +256,12 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, MVT XLenVT = Subtarget->getXLenVT(); RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; if (IsMasked) { SmallVector Regs(Node->op_begin() + CurOp, Node->op_begin() + CurOp + NF); @@ -249,13 +274,20 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, Operands.push_back(Base); // Base pointer. Operands.push_back(Node->getOperand(CurOp++)); // Index. MVT IndexVT = Operands.back()->getSimpleValueType(0); - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && "Element count mismatch"); @@ -297,7 +329,11 @@ void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); SmallVector Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); - SmallVector Operands; + + SDValue Chain = Node->getOperand(0); + SDValue Glue; + + SmallVector Operands; Operands.push_back(StoreVal); unsigned CurOp = 2 + NF; SDValue Base; @@ -305,13 +341,20 @@ void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, Operands.push_back(Base); // Base pointer. if (IsStrided) Operands.push_back(Node->getOperand(CurOp++)); // Stride. - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( NF, IsMasked, IsStrided, ScalarSize, static_cast(LMUL)); MachineSDNode *Store = @@ -334,9 +377,13 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, MVT XLenVT = Subtarget->getXLenVT(); RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); - SmallVector Operands; SmallVector Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); + + SDValue Chain = Node->getOperand(0); + SDValue Glue; + + SmallVector Operands; Operands.push_back(StoreVal); unsigned CurOp = 2 + NF; SDValue Base; @@ -344,13 +391,20 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, Operands.push_back(Base); // Base pointer. Operands.push_back(Node->getOperand(CurOp++)); // Index. MVT IndexVT = Operands.back()->getSimpleValueType(0); - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && "Element count mismatch"); @@ -620,8 +674,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { MVT XLenVT = Subtarget->getXLenVT(); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; if (IsMasked) Operands.push_back(Node->getOperand(CurOp++)); SDValue Base; @@ -629,13 +686,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { Operands.push_back(Base); // Base pointer. Operands.push_back(Node->getOperand(CurOp++)); // Index. MVT IndexVT = Operands.back()->getSimpleValueType(0); - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && "Element count mismatch"); @@ -672,8 +736,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned SEWImm = (IntNo == Intrinsic::riscv_vle1) ? 8 : ScalarSize; SDValue SEW = CurDAG->getTargetConstant(SEWImm, DL, XLenVT); + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; if (IsMasked) Operands.push_back(Node->getOperand(CurOp++)); SDValue Base; @@ -681,13 +748,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { Operands.push_back(Base); // Base pointer. if (IsStrided) Operands.push_back(Node->getOperand(CurOp++)); // Stride. - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = @@ -711,6 +785,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { MVT XLenVT = Subtarget->getXLenVT(); SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SDValue Chain = Node->getOperand(0); + SDValue Glue; + unsigned CurOp = 2; SmallVector Operands; if (IsMasked) @@ -718,13 +795,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue Base; SelectBaseAddr(Node->getOperand(CurOp++), Base); Operands.push_back(Base); // Base pointer. - if (IsMasked) - Operands.push_back(Node->getOperand(CurOp++)); // Mask. + if (IsMasked) { + // Mask needs to be copied to V0. + SDValue Mask = Node->getOperand(CurOp++); + Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); + Glue = Chain.getValue(1); + Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); Operands.push_back(VL); Operands.push_back(SEW); - Operands.push_back(Node->getOperand(0)); // Chain. + Operands.push_back(Chain); // Chain. + if (Glue) + Operands.push_back(Glue); RISCVVLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = @@ -842,7 +926,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; Operands.push_back(Node->getOperand(CurOp++)); // Store value. SDValue Base; SelectBaseAddr(Node->getOperand(CurOp++), Base); @@ -893,7 +977,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue SEW = CurDAG->getTargetConstant(SEWImm, DL, XLenVT); unsigned CurOp = 2; - SmallVector Operands; + SmallVector Operands; Operands.push_back(Node->getOperand(CurOp++)); // Store value. SDValue Base; SelectBaseAddr(Node->getOperand(CurOp++), Base); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index e8add2a..6e8f905 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -368,15 +368,15 @@ define void @masked_load_v32f64(<32 x double>* %a, <32 x double>* %m_ptr, <32 x ; RV32-NEXT: vle64.v v8, (a1) ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: fcvt.d.w ft0, zero -; RV32-NEXT: vmfeq.vf v0, v8, ft0 -; RV32-NEXT: vmfeq.vf v26, v16, ft0 -; RV32-NEXT: vle64.v v8, (a0), v0.t -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vmv1r.v v0, v26 +; RV32-NEXT: vmfeq.vf v25, v8, ft0 +; RV32-NEXT: vmfeq.vf v0, v16, ft0 +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vle64.v v8, (a1), v0.t +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vle64.v v16, (a0), v0.t -; RV32-NEXT: vse64.v v8, (a2) +; RV32-NEXT: vse64.v v16, (a2) ; RV32-NEXT: addi a0, a2, 128 -; RV32-NEXT: vse64.v v16, (a0) +; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: masked_load_v32f64: @@ -386,15 +386,15 @@ define void @masked_load_v32f64(<32 x double>* %a, <32 x double>* %m_ptr, <32 x ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: fmv.d.x ft0, zero -; RV64-NEXT: vmfeq.vf v0, v8, ft0 -; RV64-NEXT: vmfeq.vf v26, v16, ft0 -; RV64-NEXT: vle64.v v8, (a0), v0.t -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v26 +; RV64-NEXT: vmfeq.vf v25, v8, ft0 +; RV64-NEXT: vmfeq.vf v0, v16, ft0 +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vle64.v v8, (a1), v0.t +; RV64-NEXT: vmv1r.v v0, v25 ; RV64-NEXT: vle64.v v16, (a0), v0.t -; RV64-NEXT: vse64.v v8, (a2) +; RV64-NEXT: vse64.v v16, (a2) ; RV64-NEXT: addi a0, a2, 128 -; RV64-NEXT: vse64.v v16, (a0) +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %m = load <32 x double>, <32 x double>* %m_ptr %mask = fcmp oeq <32 x double> %m, zeroinitializer @@ -432,15 +432,15 @@ define void @masked_load_v64f32(<64 x float>* %a, <64 x float>* %m_ptr, <64 x fl ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 -; CHECK-NEXT: vmfeq.vf v26, v16, ft0 -; CHECK-NEXT: vle32.v v8, (a0), v0.t -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vf v25, v8, ft0 +; CHECK-NEXT: vmfeq.vf v0, v16, ft0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle32.v v8, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vle32.v v16, (a0), v0.t -; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: addi a0, a2, 128 -; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %m = load <64 x float>, <64 x float>* %m_ptr %mask = fcmp oeq <64 x float> %m, zeroinitializer @@ -459,15 +459,15 @@ define void @masked_load_v128f16(<128 x half>* %a, <128 x half>* %m_ptr, <128 x ; CHECK-NEXT: vle16.v v8, (a1) ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 -; CHECK-NEXT: vmfeq.vf v26, v16, ft0 -; CHECK-NEXT: vle16.v v8, (a0), v0.t -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vf v25, v8, ft0 +; CHECK-NEXT: vmfeq.vf v0, v16, ft0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle16.v v8, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vle16.v v16, (a0), v0.t -; CHECK-NEXT: vse16.v v8, (a2) +; CHECK-NEXT: vse16.v v16, (a2) ; CHECK-NEXT: addi a0, a2, 128 -; CHECK-NEXT: vse16.v v16, (a0) +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %m = load <128 x half>, <128 x half>* %m_ptr %mask = fcmp oeq <128 x half> %m, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 5d3ff7c..5462f26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -460,23 +460,35 @@ declare <32 x i32> @llvm.masked.load.v32i32(<32 x i32>*, i32, <32 x i1>, <32 x i define void @masked_load_v32i64(<32 x i64>* %a, <32 x i64>* %m_ptr, <32 x i64>* %res_ptr) nounwind { ; RV32-LABEL: masked_load_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a1, 128 ; RV32-NEXT: vsetivli a4, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (a3) +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu -; RV32-NEXT: vmseq.vv v0, v16, v24 -; RV32-NEXT: vmseq.vv v16, v8, v24 -; RV32-NEXT: vle64.v v8, (a0), v0.t -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmseq.vv v25, v16, v8 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmseq.vv v0, v16, v8 +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vle64.v v8, (a1), v0.t +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vle64.v v16, (a0), v0.t -; RV32-NEXT: vse64.v v8, (a2) +; RV32-NEXT: vse64.v v16, (a2) ; RV32-NEXT: addi a0, a2, 128 -; RV32-NEXT: vse64.v v16, (a0) +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: masked_load_v32i64: @@ -485,15 +497,15 @@ define void @masked_load_v32i64(<32 x i64>* %a, <32 x i64>* %m_ptr, <32 x i64>* ; RV64-NEXT: vsetivli a4, 16, e64,m8,ta,mu ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) -; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vmseq.vi v26, v16, 0 -; RV64-NEXT: vle64.v v8, (a0), v0.t -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v26 +; RV64-NEXT: vmseq.vi v25, v8, 0 +; RV64-NEXT: vmseq.vi v0, v16, 0 +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vle64.v v8, (a1), v0.t +; RV64-NEXT: vmv1r.v v0, v25 ; RV64-NEXT: vle64.v v16, (a0), v0.t -; RV64-NEXT: vse64.v v8, (a2) +; RV64-NEXT: vse64.v v16, (a2) ; RV64-NEXT: addi a0, a2, 128 -; RV64-NEXT: vse64.v v16, (a0) +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %m = load <32 x i64>, <32 x i64>* %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer @@ -547,15 +559,15 @@ define void @masked_load_v64i32(<64 x i32>* %a, <64 x i32>* %m_ptr, <64 x i32>* ; CHECK-NEXT: vsetvli a4, a4, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v26, v16, 0 -; CHECK-NEXT: vle32.v v8, (a0), v0.t -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmseq.vi v25, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle32.v v8, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vle32.v v16, (a0), v0.t -; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: addi a0, a2, 128 -; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %m = load <64 x i32>, <64 x i32>* %m_ptr %mask = icmp eq <64 x i32> %m, zeroinitializer @@ -591,15 +603,15 @@ define void @masked_load_v256i8(<256 x i8>* %a, <256 x i8>* %m_ptr, <256 x i8>* ; CHECK-NEXT: vsetvli a4, a4, e8,m8,ta,mu ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v16, (a3) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v26, v16, 0 -; CHECK-NEXT: vle8.v v8, (a0), v0.t -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmseq.vi v25, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle8.v v8, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vle8.v v16, (a0), v0.t -; CHECK-NEXT: vse8.v v8, (a2) +; CHECK-NEXT: vse8.v v16, (a2) ; CHECK-NEXT: addi a0, a2, 128 -; CHECK-NEXT: vse8.v v16, (a0) +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %m = load <256 x i8>, <256 x i8>* %m_ptr %mask = icmp eq <256 x i8> %m, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 72321ea..27c64fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2266,6 +2266,7 @@ define @mgather_baseidx_nxv32i8(i8* %base, ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v25, v0 ; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu @@ -2273,18 +2274,16 @@ define @mgather_baseidx_nxv32i8(i8* %base, ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8,mf4,ta,mu -; RV64-NEXT: vslidedown.vx v25, v0, a1 -; RV64-NEXT: vmv1r.v v26, v0 +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a2, zero, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli a2, zero, e8,m1,tu,mu -; RV64-NEXT: vmv1r.v v0, v25 ; RV64-NEXT: vloxei64.v v13, (a0), v16, v0.t ; RV64-NEXT: slli a2, a1, 1 ; RV64-NEXT: vsetvli a3, zero, e8,mf2,ta,mu -; RV64-NEXT: vslidedown.vx v26, v26, a2 +; RV64-NEXT: vslidedown.vx v25, v25, a2 ; RV64-NEXT: vsetvli a2, zero, e8,mf4,ta,mu -; RV64-NEXT: vslidedown.vx v0, v26, a1 +; RV64-NEXT: vslidedown.vx v0, v25, a1 ; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu @@ -2292,7 +2291,7 @@ define @mgather_baseidx_nxv32i8(i8* %base, ; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu -; RV64-NEXT: vmv1r.v v0, v26 +; RV64-NEXT: vmv1r.v v0, v25 ; RV64-NEXT: vloxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll new file mode 100644 index 0000000..84523cf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zvlsseg,+experimental-zfh,+m \ +; RUN: -regalloc=fast -verify-machineinstrs < %s | FileCheck %s + +; This test previously crashed with an error "ran out of registers during register allocation" + +declare void @llvm.riscv.vsseg2.mask.nxv16i16(,, i16*, , i32) + +define void @test_vsseg2_mask_nxv16i16( %val, i16* %base, %mask, i32 %vl) { +; CHECK-LABEL: test_vsseg2_mask_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv4r.v v4, v8 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vsseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: ret +entry: + tail call void @llvm.riscv.vsseg2.mask.nxv16i16( %val, %val, i16* %base, %mask, i32 %vl) + ret void +}