From: Craig Topper Date: Tue, 16 Feb 2021 17:26:22 +0000 (-0800) Subject: [RISCV] Add support for fixed vector mask logic operations. X-Git-Tag: llvmorg-14-init~14983 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=07ca13fe0766ded6fd69a6729275020e6b4c0b1b;p=platform%2Fupstream%2Fllvm.git [RISCV] Add support for fixed vector mask logic operations. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D96741 --- diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f7b7fc8..c2ac6a5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -531,6 +531,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::AND, VT, Custom); + setOperationAction(ISD::OR, VT, Custom); + setOperationAction(ISD::XOR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); continue; } @@ -1209,11 +1212,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::MUL: return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); case ISD::AND: - return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, + RISCVISD::AND_VL); case ISD::OR: - return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, + RISCVISD::OR_VL); case ISD::XOR: - return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, + RISCVISD::XOR_VL); case ISD::SDIV: return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); case ISD::SREM: @@ -2231,8 +2237,19 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, return convertFromScalableVector(VT, Cmp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( + SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + + if (VT.getVectorElementType() == MVT::i1) + return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); + + return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); +} + SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOpc) const { + unsigned NewOpc, + bool HasMask) const { MVT VT = Op.getSimpleValueType(); assert(useRVVForFixedLengthVectorVT(VT) && "Only expected to lower fixed length vector operation!"); @@ -2258,7 +2275,8 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - Ops.push_back(Mask); + if (HasMask) + Ops.push_back(Mask); Ops.push_back(VL); SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 74fe07a..9b0503a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -407,8 +407,11 @@ private: SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOpc) const; + SDValue lowerFixedLengthVectorLogicOpToRVV(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const; + SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, + bool HasMask = true) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index da51548..7eb569d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -126,6 +126,9 @@ def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; +def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl), + (riscv_vmxor_vl node:$rs, true_mask, node:$vl)>; + // Ignore the vl operand. def SplatFPOp : PatFrag<(ops node:$op), (riscv_vfmv_v_f_vl node:$op, srcvalue)>; @@ -558,15 +561,41 @@ foreach mti = AllMasks in { (!cast("PseudoVMXOR_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; - // FIXME: Add remaining mask instructions. - def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmor_vl VR:$rs1, VR:$rs2, + def : Pat<(mti.Mask (riscv_vmand_vl (riscv_vmnot_vl VR:$rs1, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMANDNOT_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmor_vl (riscv_vmnot_vl VR:$rs1, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMORNOT_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + // XOR is associative so we need 2 patterns for VMXNOR. + def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMXNOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + + def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmand_vl VR:$rs1, VR:$rs2, + (XLenVT (VLOp GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmor_vl VR:$rs1, VR:$rs2, (XLenVT (VLOp GPR:$vl))), - true_mask, (XLenVT (VLOp GPR:$vl)))), + (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMNOR_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmxor_vl VR:$rs1, VR:$rs2, + (XLenVT (VLOp GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMXNOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; // Match the not idiom to the vnot.mm pseudo. - def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs, true_mask, (XLenVT (VLOp GPR:$vl)))), + def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) VR:$rs, VR:$rs, GPR:$vl, mti.SEW)>; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll new file mode 100644 index 0000000..89eeebf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +define void @and_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = and <8 x i1> %a, %b + store <8 x i1> %c, <8 x i1>* %x + ret void +} + +define void @or_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = or <16 x i1> %a, %b + store <16 x i1> %c, <16 x i1>* %x + ret void +} + +define void @xor_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xor_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, %b + store <32 x i1> %c, <32 x i1>* %x + ret void +} + +define void @not_v64i1(<64 x i1>* %x, <64 x i1>* %y) { +; CHECK-LABEL: not_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %b = load <64 x i1>, <64 x i1>* %y + %c = xor <64 x i1> %a, + store <64 x i1> %c, <64 x i1>* %x + ret void +} + +define void @andnot_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: andnot_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmandnot.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = xor <8 x i1> %a, + %d = and <8 x i1> %b, %c + store <8 x i1> %d, <8 x i1>* %x + ret void +} + +define void @ornot_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: ornot_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmornot.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = xor <16 x i1> %a, + %d = or <16 x i1> %b, %c + store <16 x i1> %d, <16 x i1>* %x + ret void +} + +define void @xornot_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xornot_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, + %d = xor <32 x i1> %b, %c + store <32 x i1> %d, <32 x i1>* %x + ret void +} + +define void @nand_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: nand_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmnand.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = and <8 x i1> %a, %b + %d = xor <8 x i1> %c, + store <8 x i1> %d, <8 x i1>* %x + ret void +} + +define void @nor_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: nor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = or <16 x i1> %a, %b + %d = xor <16 x i1> %c, + store <16 x i1> %d, <16 x i1>* %x + ret void +} + +define void @xnor_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xnor_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, %b + %d = xor <32 x i1> %c, + store <32 x i1> %d, <32 x i1>* %x + ret void +}