From 0278c9ceb6c2475f1b1213fcc4ffaf667c6b7731 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Thu, 13 Oct 2022 23:11:48 +0900 Subject: [PATCH] [VE] Change the way to lower select Change to use VEISD::CMOV in combineSelect for better optimization. Support VEISD::CMOV in combineTRUNCATE also to optimize trancate. Merge functions to handle condition codes to VE.h. And add basic CMOV patterns to VEInstrInfo.td. Update regression tests also. Reviewed By: efocht Differential Revision: https://reviews.llvm.org/D135878 --- llvm/lib/Target/VE/VE.h | 96 +++++++++++++++++++ llvm/lib/Target/VE/VEISelDAGToDAG.cpp | 94 ------------------- llvm/lib/Target/VE/VEISelLowering.cpp | 134 ++++++++++++++++++++++----- llvm/lib/Target/VE/VEISelLowering.h | 3 + llvm/lib/Target/VE/VEInstrInfo.td | 152 +++++++++++++++++++++---------- llvm/test/CodeGen/VE/Scalar/select.ll | 21 ----- llvm/test/CodeGen/VE/Scalar/select_cc.ll | 7 -- 7 files changed, 314 insertions(+), 193 deletions(-) diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index 2794d14..4f7ec91 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -16,6 +16,8 @@ #include "MCTargetDesc/VEMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" @@ -145,6 +147,10 @@ inline static VECC::CondCode stringToVEFCondCode(StringRef S) { .Default(VECC::UNKNOWN); } +inline static bool isIntVECondCode(VECC::CondCode CC) { + return CC < VECC::CC_AF; +} + inline static unsigned VECondCodeToVal(VECC::CondCode CC) { switch (CC) { case VECC::CC_IG: @@ -196,6 +202,80 @@ inline static unsigned VECondCodeToVal(VECC::CondCode CC) { } } +/// Convert a DAG integer condition code to a VE ICC condition. +inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) { + switch (CC) { + default: + llvm_unreachable("Unknown integer condition code!"); + case ISD::SETEQ: + return VECC::CC_IEQ; + case ISD::SETNE: + return VECC::CC_INE; + case ISD::SETLT: + return VECC::CC_IL; + case ISD::SETGT: + return VECC::CC_IG; + case ISD::SETLE: + return VECC::CC_ILE; + case ISD::SETGE: + return VECC::CC_IGE; + case ISD::SETULT: + return VECC::CC_IL; + case ISD::SETULE: + return VECC::CC_ILE; + case ISD::SETUGT: + return VECC::CC_IG; + case ISD::SETUGE: + return VECC::CC_IGE; + } +} + +/// Convert a DAG floating point condition code to a VE FCC condition. +inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) { + switch (CC) { + default: + llvm_unreachable("Unknown fp condition code!"); + case ISD::SETFALSE: + return VECC::CC_AF; + case ISD::SETEQ: + case ISD::SETOEQ: + return VECC::CC_EQ; + case ISD::SETNE: + case ISD::SETONE: + return VECC::CC_NE; + case ISD::SETLT: + case ISD::SETOLT: + return VECC::CC_L; + case ISD::SETGT: + case ISD::SETOGT: + return VECC::CC_G; + case ISD::SETLE: + case ISD::SETOLE: + return VECC::CC_LE; + case ISD::SETGE: + case ISD::SETOGE: + return VECC::CC_GE; + case ISD::SETO: + return VECC::CC_NUM; + case ISD::SETUO: + return VECC::CC_NAN; + case ISD::SETUEQ: + return VECC::CC_EQNAN; + case ISD::SETUNE: + return VECC::CC_NENAN; + case ISD::SETULT: + return VECC::CC_LNAN; + case ISD::SETUGT: + return VECC::CC_GNAN; + case ISD::SETULE: + return VECC::CC_LENAN; + case ISD::SETUGE: + return VECC::CC_GENAN; + case ISD::SETTRUE: + return VECC::CC_AT; + } +} + inline static VECC::CondCode VEValToCondCode(unsigned Val, bool IsInteger) { if (IsInteger) { switch (Val) { @@ -320,6 +400,22 @@ inline static VERD::RoundingMode VEValToRD(unsigned Val) { llvm_unreachable("Invalid branch predicates"); } +/// getImmVal - get immediate representation of integer value +inline static uint64_t getImmVal(const ConstantSDNode *N) { + return N->getSExtValue(); +} + +/// getFpImmVal - get immediate representation of floating point value +inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) { + const APInt &Imm = N->getValueAPF().bitcastToAPInt(); + uint64_t Val = Imm.getZExtValue(); + if (Imm.getBitWidth() == 32) { + // Immediate value of float place places at higher bits on VE. + Val <<= 32; + } + return Val; +} + // MImm - Special immediate value of sequential bit stream of 0 or 1. // See VEInstrInfo.td for details. inline static bool isMImmVal(uint64_t Val) { diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index 15fd257..0301b46 100644 --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -20,100 +20,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// Instruction Selector Implementation -//===----------------------------------------------------------------------===// - -/// Convert a DAG integer condition code to a VE ICC condition. -inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) { - switch (CC) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ISD::SETEQ: - return VECC::CC_IEQ; - case ISD::SETNE: - return VECC::CC_INE; - case ISD::SETLT: - return VECC::CC_IL; - case ISD::SETGT: - return VECC::CC_IG; - case ISD::SETLE: - return VECC::CC_ILE; - case ISD::SETGE: - return VECC::CC_IGE; - case ISD::SETULT: - return VECC::CC_IL; - case ISD::SETULE: - return VECC::CC_ILE; - case ISD::SETUGT: - return VECC::CC_IG; - case ISD::SETUGE: - return VECC::CC_IGE; - } -} - -/// Convert a DAG floating point condition code to a VE FCC condition. -inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) { - switch (CC) { - default: - llvm_unreachable("Unknown fp condition code!"); - case ISD::SETFALSE: - return VECC::CC_AF; - case ISD::SETEQ: - case ISD::SETOEQ: - return VECC::CC_EQ; - case ISD::SETNE: - case ISD::SETONE: - return VECC::CC_NE; - case ISD::SETLT: - case ISD::SETOLT: - return VECC::CC_L; - case ISD::SETGT: - case ISD::SETOGT: - return VECC::CC_G; - case ISD::SETLE: - case ISD::SETOLE: - return VECC::CC_LE; - case ISD::SETGE: - case ISD::SETOGE: - return VECC::CC_GE; - case ISD::SETO: - return VECC::CC_NUM; - case ISD::SETUO: - return VECC::CC_NAN; - case ISD::SETUEQ: - return VECC::CC_EQNAN; - case ISD::SETUNE: - return VECC::CC_NENAN; - case ISD::SETULT: - return VECC::CC_LNAN; - case ISD::SETUGT: - return VECC::CC_GNAN; - case ISD::SETULE: - return VECC::CC_LENAN; - case ISD::SETUGE: - return VECC::CC_GENAN; - case ISD::SETTRUE: - return VECC::CC_AT; - } -} - -/// getImmVal - get immediate representation of integer value -inline static uint64_t getImmVal(const ConstantSDNode *N) { - return N->getSExtValue(); -} - -/// getFpImmVal - get immediate representation of floating point value -inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) { - const APInt &Imm = N->getValueAPF().bitcastToAPInt(); - uint64_t Val = Imm.getZExtValue(); - if (Imm.getBitWidth() == 32) { - // Immediate value of float place places at higher bits on VE. - Val <<= 32; - } - return Val; -} - //===--------------------------------------------------------------------===// /// VEDAGToDAGISel - VE specific code to select VE machine /// instructions for SelectionDAG operations. diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 32de4c6..4fe6aff 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -914,6 +914,7 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::SELECT); // Set function alignment to 16 bytes setMinFunctionAlignment(Align(16)); @@ -931,6 +932,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((VEISD::NodeType)Opcode) { case VEISD::FIRST_NUMBER: break; + TARGET_NODE_CASE(CMOV) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(EH_SJLJ_LONGJMP) TARGET_NODE_CASE(EH_SJLJ_SETJMP) @@ -2685,6 +2687,69 @@ VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } } +static bool isMImm(SDValue V) { + EVT VT = V.getValueType(); + if (VT.isVector()) + return false; + + if (VT.isInteger()) { + if (ConstantSDNode *C = dyn_cast(V)) + return isMImmVal(getImmVal(C)); + } else if (VT.isFloatingPoint()) { + if (ConstantFPSDNode *C = dyn_cast(V)) { + if (VT == MVT::f32) { + // Float value places at higher bits, so ignore lower 32 bits. + return isMImm32Val(getFpImmVal(C) >> 32); + } else if (VT == MVT::f64) { + return isMImmVal(getFpImmVal(C)); + } + } + } + return false; +} + +SDValue VETargetLowering::combineSelect(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::SELECT && + "Should be called with a SELECT node"); + ISD::CondCode CC = ISD::CondCode::SETNE; + SDValue Cond = N->getOperand(0); + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + // We handle only scalar SELECT. + EVT VT = N->getValueType(0); + if (VT.isVector()) + return SDValue(); + + // Peform combineSelect after leagalize DAG. + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + EVT VT0 = Cond.getValueType(); + if (isMImm(True)) { + // VE's condition move can handle MImm in True clause, so nothing to do. + } else if (isMImm(False)) { + // VE's condition move can handle MImm in True clause, so swap True and + // False clauses if False has MImm value. And, update condition code. + std::swap(True, False); + CC = getSetCCInverse(CC, VT0); + } + + SDLoc DL(N); + SelectionDAG &DAG = DCI.DAG; + VECC::CondCode VECCVal; + if (VT0.isFloatingPoint()) { + VECCVal = fpCondCode2Fcc(CC); + } else { + VECCVal = intCondCode2Icc(CC); + } + SDValue Ops[] = {Cond, True, False, + DAG.getConstant(VECCVal, DL, MVT::i32)}; + return DAG.getNode(VEISD::CMOV, DL, VT, Ops); +} + +static bool isI32InsnAllUses(const SDNode *User, const SDNode *N); static bool isI32Insn(const SDNode *User, const SDNode *N) { switch (User->getOpcode()) { default: @@ -2717,6 +2782,17 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) { if (User->getOperand(2).getNode() != N && User->getOperand(3).getNode() != N) return true; + return isI32InsnAllUses(User, N); + case VEISD::CMOV: + // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe. + // However, trunc in true or false clauses is not safe. + if (User->getOperand(1).getNode() != N && + User->getOperand(2).getNode() != N && + isa(User->getOperand(3))) { + VECC::CondCode VECCVal = static_cast( + cast(User->getOperand(3))->getZExtValue()); + return isIntVECondCode(VECCVal); + } [[fallthrough]]; case ISD::AND: case ISD::OR: @@ -2725,33 +2801,39 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) { case ISD::CopyToReg: // Check all use of selections, bit operations, and copies. If all of them // are safe, optimize truncate to extract_subreg. - for (const SDNode *U : User->uses()) { - switch (U->getOpcode()) { - default: - // If the use is an instruction which treats the source operand as i32, - // it is safe to avoid truncate here. - if (isI32Insn(U, N)) - continue; - break; - case ISD::ANY_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: { - // Special optimizations to the combination of ext and trunc. - // (ext ... (select ... (trunc ...))) is safe to avoid truncate here - // since this truncate instruction clears higher 32 bits which is filled - // by one of ext instructions later. - assert(N->getValueType(0) == MVT::i32 && - "find truncate to not i32 integer"); - if (User->getOpcode() == ISD::SELECT_CC || - User->getOpcode() == ISD::SELECT) - continue; - break; - } - } - return false; + return isI32InsnAllUses(User, N); + } +} + +static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) { + // Check all use of User node. If all of them are safe, optimize + // truncate to extract_subreg. + for (const SDNode *U : User->uses()) { + switch (U->getOpcode()) { + default: + // If the use is an instruction which treats the source operand as i32, + // it is safe to avoid truncate here. + if (isI32Insn(U, N)) + continue; + break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: { + // Special optimizations to the combination of ext and trunc. + // (ext ... (select ... (trunc ...))) is safe to avoid truncate here + // since this truncate instruction clears higher 32 bits which is filled + // by one of ext instructions later. + assert(N->getValueType(0) == MVT::i32 && + "find truncate to not i32 integer"); + if (User->getOpcode() == ISD::SELECT_CC || + User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV) + continue; + break; } - return true; + } + return false; } + return true; } // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is @@ -2801,6 +2883,8 @@ SDValue VETargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; + case ISD::SELECT: + return combineSelect(N, DCI); case ISD::TRUNCATE: return combineTRUNCATE(N, DCI); } diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index b9a29e4..6b8c37d 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -24,6 +24,8 @@ namespace VEISD { enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, + CMOV, // Select between two values using the result of comparison. + CALL, // A call instruction. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. EH_SJLJ_SETJMP, // SjLj exception handling setjmp. @@ -200,6 +202,7 @@ public: /// Custom DAGCombine { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + SDValue combineSelect(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; /// } Custom DAGCombine diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index d8eb651..83c4fb1 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -446,6 +446,11 @@ def retflag : SDNode<"VEISD::RET_FLAG", SDTNone, def getGOT : Operand; +// res = cmov cmp, t, f, cond +def SDT_Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, + SDTCisVT<4, i32>]>; +def cmov : SDNode<"VEISD::CMOV", SDT_Cmov>; + def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, @@ -715,21 +720,31 @@ multiclass RRSWPmopc, // e.g. CMOVL, CMOVW, CMOVD, and etc. let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0, cfw = ? in -multiclass RRCMOVmopc, RegisterClass RC> { +multiclass RRCMOVmopc, RegisterClass RC, ValueType Ty, + SDPatternOperator OpNode = null_frag, + Operand immOp = simm7> { def rr : RR; + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set i64:$sx, (OpNode Ty:$sy, i64:$sz, i64:$sd, + (i32 CCOp:$cfw)))]>; let cy = 0 in def ir : RR; + (ins CCOp:$cfw, immOp:$sy, I64:$sz, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set i64:$sx, (OpNode (Ty immOp:$sy), i64:$sz, i64:$sd, + (i32 CCOp:$cfw)))]>; let cz = 0 in def rm : RR; + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set i64:$sx, (OpNode Ty:$sy, (i64 mimm:$sz), i64:$sd, + (i32 CCOp:$cfw)))]>; let cy = 0, cz = 0 in def im : RR; + (ins CCOp:$cfw, immOp:$sy, mimm:$sz, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set i64:$sx, (OpNode (Ty immOp:$sy), (i64 mimm:$sz), i64:$sd, + (i32 CCOp:$cfw)))]>; } // Multiclass for floating point conversion instructions. @@ -1307,10 +1322,16 @@ def : Pat<(i32 (bswap (i32 mimm:$src))), (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>; // Section 8.5.11 - CMOV (Conditional Move) -let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64>; -let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32>; -let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64>; -let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32>; +let isReMaterializable = 1 in { +let cw = 0, cw2 = 0 in +defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64, cmov>; +let cw = 1, cw2 = 0 in +defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32, cmov>; +let cw = 0, cw2 = 1 in +defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64, cmov, simm7fp>; +let cw = 1, cw2 = 1 in +defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32, cmov, simm7fp>; +} def : MnemonicAlias<"cmov.l", "cmov.l.at">; def : MnemonicAlias<"cmov.w", "cmov.w.at">; def : MnemonicAlias<"cmov.d", "cmov.d.at">; @@ -2182,41 +2203,80 @@ def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)), def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)), (cmov128rr (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>; -// Generic SELECT pattern matches -// Use cmov.w for all cases since %pred holds i32. -// -// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true - -def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)), - (cmov32rr CC_INE, $pred, $t, $f)>; -def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)), - (cmov32rm CC_INE, $pred, $t, $f)>; -def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))), - (cmov32rm CC_IEQ, $pred, $f, $t)>; - -def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)), - (cmovrr CC_INE, $pred, $t, $f)>; -def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)), - (cmovrm CC_INE, $pred, $t, $f)>; -def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))), - (cmovrm CC_IEQ, $pred, $f, $t)>; - -def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)), - (cmov32rr CC_INE, $pred, $t, $f)>; -def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)), - (cmov32rm CC_INE, $pred, $t, $f)>; -def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))), - (cmov32rm CC_IEQ, $pred, $f, $t)>; - -def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)), - (cmovrr CC_INE, $pred, $t, $f)>; -def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)), - (cmovrm CC_INE, $pred, $t, $f)>; -def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))), - (cmovrm CC_IEQ, $pred, $f, $t)>; - -def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)), - (cmov128rr CC_INE, $pred, $t, $f)>; +// Generic CMOV pattern matches +// CMOV accepts i64 $t, $f, and result. So, we extend it to support +// i32/f32/f64/f128 $t, $f, and result. + +// CMOV for i32 +multiclass CMOVI32m { + def : Pat<(i32 (cmov TY:$cmp, i32:$t, i32:$f, (i32 CCOp:$cond))), + (EXTRACT_SUBREG + (!cast(Insn#"rr") (CCOP $cond), $cmp, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; + def : Pat<(i32 (cmov TY:$cmp, (i32 mimm:$t), i32:$f, (i32 CCOp:$cond))), + (EXTRACT_SUBREG + (!cast(Insn#"rm") (CCOP $cond), $cmp, + (MIMM $t), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +} +defm : CMOVI32m; +defm : CMOVI32m; +defm : CMOVI32m; +defm : CMOVI32m; + +// CMOV for f32 +multiclass CMOVF32m { + def : Pat<(f32 (cmov TY:$cmp, f32:$t, f32:$f, (i32 CCOp:$cond))), + (EXTRACT_SUBREG + (!cast(Insn#"rr") + (CCOP $cond), $cmp, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; + def : Pat<(f32 (cmov TY:$cmp, (f32 mimmfp:$t), f32:$f, (i32 CCOp:$cond))), + (EXTRACT_SUBREG + (!cast(Insn#"rm") + (CCOP $cond), $cmp, (MIMMFP $t), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +} +defm : CMOVF32m; +defm : CMOVF32m; +defm : CMOVF32m; +defm : CMOVF32m; + +// CMOV for f64 +multiclass CMOVF64m { + def : Pat<(f64 (cmov TY:$cmp, f64:$t, f64:$f, (i32 CCOp:$cond))), + (!cast(Insn#"rr") (CCOP $cond), $cmp, $t, $f)>; + def : Pat<(f64 (cmov TY:$cmp, (f64 mimmfp:$t), f64:$f, (i32 CCOp:$cond))), + (!cast(Insn#"rm") (CCOP $cond), $cmp, (MIMMFP $t), + $f)>; +} +defm : CMOVF64m; +defm : CMOVF64m; +defm : CMOVF64m; +defm : CMOVF64m; + +// CMOV for f128 +multiclass CMOVF128m { + def : Pat<(f128 (cmov TY:$cmp, f128:$t, f128:$f, (i32 CCOp:$cond))), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (!cast(Insn#"rr") (CCOP $cond), $cmp, + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (!cast(Insn#"rr") (CCOP $cond), $cmp, + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +} +defm : CMOVF128m; +defm : CMOVF128m; +defm : CMOVF128m; +defm : CMOVF128m; // bitconvert def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>; diff --git a/llvm/test/CodeGen/VE/Scalar/select.ll b/llvm/test/CodeGen/VE/Scalar/select.ll index eeb3f03..184513a 100644 --- a/llvm/test/CodeGen/VE/Scalar/select.ll +++ b/llvm/test/CodeGen/VE/Scalar/select.ll @@ -119,7 +119,6 @@ define zeroext i32 @select_u32_var(i1 zeroext %0, i32 zeroext %1, i32 zeroext %2 define i64 @select_i64_var(i1 zeroext %0, i64 %1, i64 %2) { ; CHECK-LABEL: select_i64_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -131,7 +130,6 @@ define i64 @select_i64_var(i1 zeroext %0, i64 %1, i64 %2) { define i64 @select_u64_var(i1 zeroext %0, i64 %1, i64 %2) { ; CHECK-LABEL: select_u64_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -143,7 +141,6 @@ define i64 @select_u64_var(i1 zeroext %0, i64 %1, i64 %2) { define i128 @select_i128_var(i1 zeroext %0, i128 %1, i128 %2) { ; CHECK-LABEL: select_i128_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s3, %s1, %s0 ; CHECK-NEXT: cmov.w.ne %s4, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -157,7 +154,6 @@ define i128 @select_i128_var(i1 zeroext %0, i128 %1, i128 %2) { define i128 @select_u128_var(i1 zeroext %0, i128 %1, i128 %2) { ; CHECK-LABEL: select_u128_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s3, %s1, %s0 ; CHECK-NEXT: cmov.w.ne %s4, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -171,7 +167,6 @@ define i128 @select_u128_var(i1 zeroext %0, i128 %1, i128 %2) { define float @select_float_var(i1 zeroext %0, float %1, float %2) { ; CHECK-LABEL: select_float_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -183,7 +178,6 @@ define float @select_float_var(i1 zeroext %0, float %1, float %2) { define double @select_double_var(i1 zeroext %0, double %1, double %2) { ; CHECK-LABEL: select_double_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -195,7 +189,6 @@ define double @select_double_var(i1 zeroext %0, double %1, double %2) { define fp128 @select_quad_var(i1 zeroext %0, fp128 %1, fp128 %2) { ; CHECK-LABEL: select_quad_var: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s4, %s2, %s0 ; CHECK-NEXT: cmov.w.ne %s5, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s4 @@ -285,7 +278,6 @@ define zeroext i32 @select_u32_mimm(i1 zeroext %0, i32 zeroext %1) { define i64 @select_i64_mimm(i1 zeroext %0, i64 %1) { ; CHECK-LABEL: select_i64_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (48)0, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -297,7 +289,6 @@ define i64 @select_i64_mimm(i1 zeroext %0, i64 %1) { define i64 @select_u64_mimm(i1 zeroext %0, i64 %1) { ; CHECK-LABEL: select_u64_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (48)0, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -309,7 +300,6 @@ define i64 @select_u64_mimm(i1 zeroext %0, i64 %1) { define i128 @select_i128_mimm(i1 zeroext %0, i128 %1) { ; CHECK-LABEL: select_i128_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (48)0, %s0 ; CHECK-NEXT: cmov.w.ne %s2, (0)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 @@ -323,7 +313,6 @@ define i128 @select_i128_mimm(i1 zeroext %0, i128 %1) { define i128 @select_u128_mimm(i1 zeroext %0, i128 %1) { ; CHECK-LABEL: select_u128_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (48)0, %s0 ; CHECK-NEXT: cmov.w.ne %s2, (0)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 @@ -337,7 +326,6 @@ define i128 @select_u128_mimm(i1 zeroext %0, i128 %1) { define float @select_float_mimm(i1 zeroext %0, float %1) { ; CHECK-LABEL: select_float_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (2)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -349,7 +337,6 @@ define float @select_float_mimm(i1 zeroext %0, float %1) { define double @select_double_mimm(i1 zeroext %0, double %1) { ; CHECK-LABEL: select_double_mimm: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s1, (2)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -366,7 +353,6 @@ define fp128 @select_quad_mimm(i1 zeroext %0, fp128 %1) { ; CHECK-NEXT: lea.sl %s1, .LCPI{{[0-9]+}}_0@hi(, %s1) ; CHECK-NEXT: ld %s4, 8(, %s1) ; CHECK-NEXT: ld %s5, (, %s1) -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s4, %s0 ; CHECK-NEXT: cmov.w.ne %s3, %s5, %s0 ; CHECK-NEXT: or %s0, 0, %s2 @@ -458,7 +444,6 @@ define zeroext i32 @select_mimm_u32(i1 zeroext %0, i32 zeroext %1) { define i64 @select_mimm_i64(i1 zeroext %0, i64 %1) { ; CHECK-LABEL: select_mimm_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (48)0, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -470,7 +455,6 @@ define i64 @select_mimm_i64(i1 zeroext %0, i64 %1) { define i64 @select_mimm_u64(i1 zeroext %0, i64 %1) { ; CHECK-LABEL: select_mimm_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (48)0, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -482,7 +466,6 @@ define i64 @select_mimm_u64(i1 zeroext %0, i64 %1) { define i128 @select_mimm_i128(i1 zeroext %0, i128 %1) { ; CHECK-LABEL: select_mimm_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (48)0, %s0 ; CHECK-NEXT: cmov.w.eq %s2, (0)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 @@ -496,7 +479,6 @@ define i128 @select_mimm_i128(i1 zeroext %0, i128 %1) { define i128 @select_mimm_u128(i1 zeroext %0, i128 %1) { ; CHECK-LABEL: select_mimm_u128: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (48)0, %s0 ; CHECK-NEXT: cmov.w.eq %s2, (0)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 @@ -510,7 +492,6 @@ define i128 @select_mimm_u128(i1 zeroext %0, i128 %1) { define float @select_mimm_float(i1 zeroext %0, float %1) { ; CHECK-LABEL: select_mimm_float: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (2)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -522,7 +503,6 @@ define float @select_mimm_float(i1 zeroext %0, float %1) { define double @select_mimm_double(i1 zeroext %0, double %1) { ; CHECK-LABEL: select_mimm_double: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.eq %s1, (2)1, %s0 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) @@ -539,7 +519,6 @@ define fp128 @select_mimm_quad(i1 zeroext %0, fp128 %1) { ; CHECK-NEXT: lea.sl %s1, .LCPI{{[0-9]+}}_0@hi(, %s1) ; CHECK-NEXT: ld %s4, 8(, %s1) ; CHECK-NEXT: ld %s5, (, %s1) -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s4, %s2, %s0 ; CHECK-NEXT: cmov.w.ne %s5, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s4 diff --git a/llvm/test/CodeGen/VE/Scalar/select_cc.ll b/llvm/test/CodeGen/VE/Scalar/select_cc.ll index 86c17bc..d8ea7f1 100644 --- a/llvm/test/CodeGen/VE/Scalar/select_cc.ll +++ b/llvm/test/CodeGen/VE/Scalar/select_cc.ll @@ -1326,7 +1326,6 @@ define i64 @select_cc_i1_i64(i1 zeroext %0, i1 zeroext %1, i64 %2, i64 %3) { ; CHECK-LABEL: select_cc_i1_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -1515,7 +1514,6 @@ define i64 @select_cc_i1_u64(i1 zeroext %0, i1 zeroext %1, i64 %2, i64 %3) { ; CHECK-LABEL: select_cc_i1_u64: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -1704,7 +1702,6 @@ define i128 @select_cc_i1_i128(i1 zeroext %0, i1 zeroext %1, i128 %2, i128 %3) { ; CHECK-LABEL: select_cc_i1_i128: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s4, %s0 ; CHECK-NEXT: cmov.w.ne %s3, %s5, %s0 ; CHECK-NEXT: or %s0, 0, %s2 @@ -1921,7 +1918,6 @@ define i128 @select_cc_i1_u128(i1 zeroext %0, i1 zeroext %1, i128 %2, i128 %3) { ; CHECK-LABEL: select_cc_i1_u128: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s4, %s0 ; CHECK-NEXT: cmov.w.ne %s3, %s5, %s0 ; CHECK-NEXT: or %s0, 0, %s2 @@ -2138,7 +2134,6 @@ define float @select_cc_i1_float(i1 zeroext %0, i1 zeroext %1, float %2, float % ; CHECK-LABEL: select_cc_i1_float: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -2327,7 +2322,6 @@ define double @select_cc_i1_double(i1 zeroext %0, i1 zeroext %1, double %2, doub ; CHECK-LABEL: select_cc_i1_double: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s3, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) @@ -2516,7 +2510,6 @@ define fp128 @select_cc_i1_quad(i1 zeroext %0, i1 zeroext %1, fp128 %2, fp128 %3 ; CHECK-LABEL: select_cc_i1_quad: ; CHECK: # %bb.0: ; CHECK-NEXT: xor %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cmov.w.ne %s2, %s4, %s0 ; CHECK-NEXT: cmov.w.ne %s3, %s5, %s0 ; CHECK-NEXT: or %s0, 0, %s2 -- 2.7.4