From 91fad1167abaab49e608298e35b7272b43d7caf5 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 16 Mar 2022 11:31:19 +0100 Subject: [PATCH] [VE] v512|256 f32|64 fneg isel and tests fneg instruction isel and tests. We do this also in preparation of fused negatate-multiple-add fp operations. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D121620 --- llvm/lib/Target/VE/VECustomDAG.cpp | 10 +++++++ llvm/lib/Target/VE/VECustomDAG.h | 1 + llvm/lib/Target/VE/VVPISelLowering.cpp | 7 +++-- llvm/lib/Target/VE/VVPInstrInfo.td | 11 ++++++++ llvm/lib/Target/VE/VVPInstrPatternsVec.td | 47 +++++++++++++++++++++++++++++++ llvm/lib/Target/VE/VVPNodes.def | 10 +++++++ llvm/test/CodeGen/VE/Packed/vec_fneg.ll | 15 ++++++++++ llvm/test/CodeGen/VE/Vector/vec_fneg.ll | 25 ++++++++++++++++ 8 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/VE/Packed/vec_fneg.ll create mode 100644 llvm/test/CodeGen/VE/Vector/vec_fneg.ll diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp index 11e87d2..8fa0a50 100644 --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -133,6 +133,16 @@ bool isVVPOrVEC(unsigned Opcode) { return false; } +bool isVVPUnaryOp(unsigned VVPOpcode) { + switch (VVPOpcode) { +#define ADD_UNARY_VVP_OP(VVPNAME, ...) \ + case VEISD::VVPNAME: \ + return true; +#include "VVPNodes.def" + } + return false; +} + bool isVVPBinaryOp(unsigned VVPOpcode) { switch (VVPOpcode) { #define ADD_BINARY_VVP_OP(VVPNAME, ...) \ diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h index 8cf1458..0d35c09 100644 --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -23,6 +23,7 @@ namespace llvm { Optional getVVPOpcode(unsigned Opcode); +bool isVVPUnaryOp(unsigned Opcode); bool isVVPBinaryOp(unsigned Opcode); bool isVVPReductionOp(unsigned Opcode); diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp index 36e71e7..73807b8 100644 --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -79,11 +79,12 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { if (!Mask) Mask = CDAG.getConstantMask(Packing, true); - if (isVVPBinaryOp(VVPOpcode)) { - assert(LegalVecVT.isSimple()); + assert(LegalVecVT.isSimple()); + if (isVVPUnaryOp(VVPOpcode)) + return CDAG.getNode(VVPOpcode, LegalVecVT, {Op->getOperand(0), Mask, AVL}); + if (isVVPBinaryOp(VVPOpcode)) return CDAG.getNode(VVPOpcode, LegalVecVT, {Op->getOperand(0), Op->getOperand(1), Mask, AVL}); - } if (isVVPReductionOp(VVPOpcode)) { auto SrcHasStart = hasReductionStartParam(Op->getOpcode()); SDValue StartV = SrcHasStart ? Op->getOperand(0) : SDValue(); diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td index 594aa61..a4e4984 100644 --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -62,6 +62,15 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. IsVLVT<4> ]>; +// UnaryFPOp(x,mask,vl) +def SDTFPUnaryOpVVP : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, + SDTCisFP<0>, + SDTCisInt<2>, + SDTCisSameNumEltsAs<0, 2>, + IsVLVT<3> +]>; + // BinaryFPOp(x,y,mask,vl) def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. SDTCisSameAs<0, 1>, @@ -152,6 +161,8 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>; def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>; def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>; +def vvp_fneg : SDNode<"VEISD::VVP_FNEG", SDTFPUnaryOpVVP>; + def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>; def c_vvp_fadd : vvp_commutative; def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>; diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td index 0efbb4d..33316ad 100644 --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -131,6 +131,53 @@ defm : VectorScatter; defm : VectorScatter; +/// FNEG { +// Directly modify the sign bit to flip the sign. + +// Set sign bits in a pack of <2 x f32>. +def packed_fneg_imm : OutPatFrag<(ins ), + (i64 (SLLri (i64 (ORim 1, (i32 32))), 31))>; + + +multiclass FNeg { + // Masked with select. + def : Pat<(vvp_select (vvp_fneg DataVT:$vx, (v256i1 srcvalue), (i32 srcvalue)), + DataVT:$vfalse, + v256i1:$mask, + i32:$avl), + (VXORmvml_v (i32 1), $vx, $mask, $avl, $vfalse)>; + + // Unmasked. + def : Pat<(vvp_fneg DataVT:$vx, (v256i1 true_mask), i32:$avl), + (VXORmvl (i32 1), $vx, $avl)>; + + // Masked. + def : Pat<(vvp_fneg DataVT:$vx, v256i1:$mask, i32:$avl), + (VXORmvml (i32 1), $vx, $mask, $avl)>; +} + +defm: FNeg; +defm: FNeg; + +///// Packed FNeg ///// + +// Masked with select. +def : Pat<(vvp_select (vvp_fneg v512f32:$vx, (v512i1 srcvalue), (i32 srcvalue)), + v512f32:$vfalse, + v512i1:$mask, + i32:$avl), + (v512f32 (PVXORrvml_v (packed_fneg_imm ), $vx, $mask, $avl, $vfalse))>; + +// Unmasked. +def : Pat<(vvp_fneg v512f32:$vx, (v512i1 true_mask), i32:$avl), + (v512f32 (PVXORrvl (packed_fneg_imm ), $vx, $avl))>; + +// Masked. +def : Pat<(vvp_fneg v512f32:$vx, v512i1:$mask, i32:$avl), + (v512f32 (PVXORrvml (packed_fneg_imm ), $vx, $mask, $avl))>; + +/// } FNEG + multiclass Binary_rv { diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def index 2d8c694..a605886 100644 --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -24,6 +24,14 @@ #define ADD_VVP_OP(X, Y) #endif +/// ADD_UNARY_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP Unary operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef ADD_UNARY_VVP_OP +#define ADD_UNARY_VVP_OP(VVPNAME,SDNAME) \ + ADD_VVP_OP(VVPNAME,SDNAME) +#endif + /// ADD_BINARY_VVP_OP(VVPNAME,SDNAME) /// \p VVPName is a VVP Binary operator. /// \p SDNAME is the generic SD opcode corresponding to \p VVPName. @@ -100,6 +108,7 @@ ADD_BINARY_VVP_OP_COMPACT(OR) REGISTER_PACKED(VVP_OR) ADD_BINARY_VVP_OP_COMPACT(XOR) REGISTER_PACKED(VVP_XOR) // FP arithmetic. +ADD_UNARY_VVP_OP(VVP_FNEG, FNEG) HANDLE_VP_TO_VVP(VP_FNEG, VVP_FNEG) REGISTER_PACKED(VVP_FNEG) ADD_BINARY_VVP_OP_COMPACT(FADD) REGISTER_PACKED(VVP_FADD) ADD_BINARY_VVP_OP_COMPACT(FSUB) REGISTER_PACKED(VVP_FSUB) ADD_BINARY_VVP_OP_COMPACT(FMUL) REGISTER_PACKED(VVP_FMUL) @@ -117,6 +126,7 @@ HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT) #undef ADD_BINARY_VVP_OP #undef ADD_TERNARY_VVP_OP +#undef ADD_UNARY_VVP_OP #undef ADD_BINARY_VVP_OP_COMPACT #undef ADD_REDUCE_VVP_OP #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Packed/vec_fneg.ll b/llvm/test/CodeGen/VE/Packed/vec_fneg.ll new file mode 100644 index 0000000..5209398 --- /dev/null +++ b/llvm/test/CodeGen/VE/Packed/vec_fneg.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +define fastcc <512 x float> @test_vec_fneg_v512f32_v(<512 x float> %v) { +; CHECK-LABEL: test_vec_fneg_v512f32_v: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 1, (32)1 +; CHECK-NEXT: sll %s0, %s0, 31 +; CHECK-NEXT: lea %s1, 512 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvxor %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %neg = fneg <512 x float> %v + ret <512 x float> %neg +} diff --git a/llvm/test/CodeGen/VE/Vector/vec_fneg.ll b/llvm/test/CodeGen/VE/Vector/vec_fneg.ll new file mode 100644 index 0000000..db1cafc --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_fneg.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +define fastcc <256 x float> @test_vec_fneg_v256f32_v(<256 x float> %v) { +; CHECK-LABEL: test_vec_fneg_v256f32_v: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vxor %v0, (1)1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %neg = fneg <256 x float> %v + ret <256 x float> %neg +} + +define fastcc <256 x double> @test_vec_fneg_v256f64_v(<256 x double> %v) { +; CHECK-LABEL: test_vec_fneg_v256f64_v: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vxor %v0, (1)1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %neg = fneg <256 x double> %v + ret <256 x double> %neg +} + -- 2.7.4