}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
switch ((AArch64ISD::NodeType)Opcode) {
- case AArch64ISD::FIRST_NUMBER: break;
- case AArch64ISD::CALL: return "AArch64ISD::CALL";
- case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
- case AArch64ISD::ADR: return "AArch64ISD::ADR";
- case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
- case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
- case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
- case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
- case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
- case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
- case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
- case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
- case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
- case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
- case AArch64ISD::ADD_PRED: return "AArch64ISD::ADD_PRED";
- case AArch64ISD::SDIV_PRED: return "AArch64ISD::SDIV_PRED";
- case AArch64ISD::UDIV_PRED: return "AArch64ISD::UDIV_PRED";
- case AArch64ISD::SMIN_PRED: return "AArch64ISD::SMIN_PRED";
- case AArch64ISD::UMIN_PRED: return "AArch64ISD::UMIN_PRED";
- case AArch64ISD::SMAX_PRED: return "AArch64ISD::SMAX_PRED";
- case AArch64ISD::UMAX_PRED: return "AArch64ISD::UMAX_PRED";
- case AArch64ISD::SHL_PRED: return "AArch64ISD::SHL_PRED";
- case AArch64ISD::SRL_PRED: return "AArch64ISD::SRL_PRED";
- case AArch64ISD::SRA_PRED: return "AArch64ISD::SRA_PRED";
- case AArch64ISD::SETCC_PRED: return "AArch64ISD::SETCC_PRED";
- case AArch64ISD::ADC: return "AArch64ISD::ADC";
- case AArch64ISD::SBC: return "AArch64ISD::SBC";
- case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
- case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
- case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
- case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
- case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
- case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
- case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
- case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
- case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
- case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP";
- case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE";
- case AArch64ISD::DUP: return "AArch64ISD::DUP";
- case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
- case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
- case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
- case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
- case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
- case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
- case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
- case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
- case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
- case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
- case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
- case AArch64ISD::BICi: return "AArch64ISD::BICi";
- case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
- case AArch64ISD::BSP: return "AArch64ISD::BSP";
- case AArch64ISD::NEG: return "AArch64ISD::NEG";
- case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
- case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
- case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
- case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
- case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
- case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
- case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
- case AArch64ISD::REV16: return "AArch64ISD::REV16";
- case AArch64ISD::REV32: return "AArch64ISD::REV32";
- case AArch64ISD::REV64: return "AArch64ISD::REV64";
- case AArch64ISD::EXT: return "AArch64ISD::EXT";
- case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
- case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
- case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
- case AArch64ISD::VSLI: return "AArch64ISD::VSLI";
- case AArch64ISD::VSRI: return "AArch64ISD::VSRI";
- case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
- case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
- case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
- case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
- case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
- case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
- case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
- case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
- case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
- case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
- case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
- case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
- case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
- case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
- case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
- case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
- case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
- case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
- case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
- case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
- case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
- case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
- case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
- case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
- case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
- case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
- case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
- case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
- case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
- case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
- case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
- case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
- case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
- case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
- case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
- case AArch64ISD::REV: return "AArch64ISD::REV";
- case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
- case AArch64ISD::TBL: return "AArch64ISD::TBL";
- case AArch64ISD::FADD_PRED: return "AArch64ISD::FADD_PRED";
- case AArch64ISD::FADDA_PRED: return "AArch64ISD::FADDA_PRED";
- case AArch64ISD::FADDV_PRED: return "AArch64ISD::FADDV_PRED";
- case AArch64ISD::FMAXV_PRED: return "AArch64ISD::FMAXV_PRED";
- case AArch64ISD::FMAXNMV_PRED: return "AArch64ISD::FMAXNMV_PRED";
- case AArch64ISD::FMINV_PRED: return "AArch64ISD::FMINV_PRED";
- case AArch64ISD::FMINNMV_PRED: return "AArch64ISD::FMINNMV_PRED";
- case AArch64ISD::NOT: return "AArch64ISD::NOT";
- case AArch64ISD::BIT: return "AArch64ISD::BIT";
- case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
- case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
- case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
- case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
- case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
- case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
- case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
- case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
- case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
- case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
- case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
- case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
- case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
- case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
- case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
- case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
- case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
- case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
- case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
- case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
- case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
- case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
- case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
- case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
- case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
- case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
- case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
- case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
- case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
- case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
- case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
- case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
- case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
- case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
- case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
- case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
- case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
- case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
- case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
- case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
- case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
- case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
- case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
- case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
- case AArch64ISD::STG: return "AArch64ISD::STG";
- case AArch64ISD::STZG: return "AArch64ISD::STZG";
- case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
- case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
- case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
- case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
- case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
- case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
- case AArch64ISD::INSR: return "AArch64ISD::INSR";
- case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
- case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
- case AArch64ISD::LD1: return "AArch64ISD::LD1";
- case AArch64ISD::LD1S: return "AArch64ISD::LD1S";
- case AArch64ISD::LDNF1: return "AArch64ISD::LDNF1";
- case AArch64ISD::LDNF1S: return "AArch64ISD::LDNF1S";
- case AArch64ISD::LDFF1: return "AArch64ISD::LDFF1";
- case AArch64ISD::LDFF1S: return "AArch64ISD::LDFF1S";
- case AArch64ISD::LD1RQ: return "AArch64ISD::LD1RQ";
- case AArch64ISD::LD1RO: return "AArch64ISD::LD1RO";
- case AArch64ISD::SVE_LD2: return "AArch64ISD::SVE_LD2";
- case AArch64ISD::SVE_LD3: return "AArch64ISD::SVE_LD3";
- case AArch64ISD::SVE_LD4: return "AArch64ISD::SVE_LD4";
- case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
- case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
- case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
- case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
- case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
- case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
- case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
- case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
- case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
- case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
- case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
- case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
- case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
- case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
- case AArch64ISD::GLDFF1: return "AArch64ISD::GLDFF1";
- case AArch64ISD::GLDFF1_SCALED: return "AArch64ISD::GLDFF1_SCALED";
- case AArch64ISD::GLDFF1_SXTW: return "AArch64ISD::GLDFF1_SXTW";
- case AArch64ISD::GLDFF1_UXTW: return "AArch64ISD::GLDFF1_UXTW";
- case AArch64ISD::GLDFF1_SXTW_SCALED:return "AArch64ISD::GLDFF1_SXTW_SCALED";
- case AArch64ISD::GLDFF1_UXTW_SCALED:return "AArch64ISD::GLDFF1_UXTW_SCALED";
- case AArch64ISD::GLDFF1_IMM: return "AArch64ISD::GLDFF1_IMM";
- case AArch64ISD::GLDFF1S: return "AArch64ISD::GLDFF1S";
- case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";
- case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";
- case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
- case AArch64ISD::GLDFF1S_SXTW_SCALED:
- return "AArch64ISD::GLDFF1S_SXTW_SCALED";
- case AArch64ISD::GLDFF1S_UXTW_SCALED:
- return "AArch64ISD::GLDFF1S_UXTW_SCALED";
- case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";
-
- case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
- case AArch64ISD::GLDNT1_INDEX: return "AArch64ISD::GLDNT1_INDEX";
- case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";
-
- case AArch64ISD::ST1: return "AArch64ISD::ST1";
-
- case AArch64ISD::SST1: return "AArch64ISD::SST1";
- case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
- case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
- case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
- case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
- case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
- case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
-
- case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";
- case AArch64ISD::SSTNT1_INDEX: return "AArch64ISD::SSTNT1_INDEX";
-
- case AArch64ISD::LDP: return "AArch64ISD::LDP";
- case AArch64ISD::STP: return "AArch64ISD::STP";
- case AArch64ISD::STNP: return "AArch64ISD::STNP";
- case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
- case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
- }
+ case AArch64ISD::FIRST_NUMBER:
+ break;
+ MAKE_CASE(AArch64ISD::CALL)
+ MAKE_CASE(AArch64ISD::ADRP)
+ MAKE_CASE(AArch64ISD::ADR)
+ MAKE_CASE(AArch64ISD::ADDlow)
+ MAKE_CASE(AArch64ISD::LOADgot)
+ MAKE_CASE(AArch64ISD::RET_FLAG)
+ MAKE_CASE(AArch64ISD::BRCOND)
+ MAKE_CASE(AArch64ISD::CSEL)
+ MAKE_CASE(AArch64ISD::FCSEL)
+ MAKE_CASE(AArch64ISD::CSINV)
+ MAKE_CASE(AArch64ISD::CSNEG)
+ MAKE_CASE(AArch64ISD::CSINC)
+ MAKE_CASE(AArch64ISD::THREAD_POINTER)
+ MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+ MAKE_CASE(AArch64ISD::ADD_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SDIV_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UDIV_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::ADC)
+ MAKE_CASE(AArch64ISD::SBC)
+ MAKE_CASE(AArch64ISD::ADDS)
+ MAKE_CASE(AArch64ISD::SUBS)
+ MAKE_CASE(AArch64ISD::ADCS)
+ MAKE_CASE(AArch64ISD::SBCS)
+ MAKE_CASE(AArch64ISD::ANDS)
+ MAKE_CASE(AArch64ISD::CCMP)
+ MAKE_CASE(AArch64ISD::CCMN)
+ MAKE_CASE(AArch64ISD::FCCMP)
+ MAKE_CASE(AArch64ISD::FCMP)
+ MAKE_CASE(AArch64ISD::STRICT_FCMP)
+ MAKE_CASE(AArch64ISD::STRICT_FCMPE)
+ MAKE_CASE(AArch64ISD::DUP)
+ MAKE_CASE(AArch64ISD::DUPLANE8)
+ MAKE_CASE(AArch64ISD::DUPLANE16)
+ MAKE_CASE(AArch64ISD::DUPLANE32)
+ MAKE_CASE(AArch64ISD::DUPLANE64)
+ MAKE_CASE(AArch64ISD::MOVI)
+ MAKE_CASE(AArch64ISD::MOVIshift)
+ MAKE_CASE(AArch64ISD::MOVIedit)
+ MAKE_CASE(AArch64ISD::MOVImsl)
+ MAKE_CASE(AArch64ISD::FMOV)
+ MAKE_CASE(AArch64ISD::MVNIshift)
+ MAKE_CASE(AArch64ISD::MVNImsl)
+ MAKE_CASE(AArch64ISD::BICi)
+ MAKE_CASE(AArch64ISD::ORRi)
+ MAKE_CASE(AArch64ISD::BSP)
+ MAKE_CASE(AArch64ISD::NEG)
+ MAKE_CASE(AArch64ISD::EXTR)
+ MAKE_CASE(AArch64ISD::ZIP1)
+ MAKE_CASE(AArch64ISD::ZIP2)
+ MAKE_CASE(AArch64ISD::UZP1)
+ MAKE_CASE(AArch64ISD::UZP2)
+ MAKE_CASE(AArch64ISD::TRN1)
+ MAKE_CASE(AArch64ISD::TRN2)
+ MAKE_CASE(AArch64ISD::REV16)
+ MAKE_CASE(AArch64ISD::REV32)
+ MAKE_CASE(AArch64ISD::REV64)
+ MAKE_CASE(AArch64ISD::EXT)
+ MAKE_CASE(AArch64ISD::VSHL)
+ MAKE_CASE(AArch64ISD::VLSHR)
+ MAKE_CASE(AArch64ISD::VASHR)
+ MAKE_CASE(AArch64ISD::VSLI)
+ MAKE_CASE(AArch64ISD::VSRI)
+ MAKE_CASE(AArch64ISD::CMEQ)
+ MAKE_CASE(AArch64ISD::CMGE)
+ MAKE_CASE(AArch64ISD::CMGT)
+ MAKE_CASE(AArch64ISD::CMHI)
+ MAKE_CASE(AArch64ISD::CMHS)
+ MAKE_CASE(AArch64ISD::FCMEQ)
+ MAKE_CASE(AArch64ISD::FCMGE)
+ MAKE_CASE(AArch64ISD::FCMGT)
+ MAKE_CASE(AArch64ISD::CMEQz)
+ MAKE_CASE(AArch64ISD::CMGEz)
+ MAKE_CASE(AArch64ISD::CMGTz)
+ MAKE_CASE(AArch64ISD::CMLEz)
+ MAKE_CASE(AArch64ISD::CMLTz)
+ MAKE_CASE(AArch64ISD::FCMEQz)
+ MAKE_CASE(AArch64ISD::FCMGEz)
+ MAKE_CASE(AArch64ISD::FCMGTz)
+ MAKE_CASE(AArch64ISD::FCMLEz)
+ MAKE_CASE(AArch64ISD::FCMLTz)
+ MAKE_CASE(AArch64ISD::SADDV)
+ MAKE_CASE(AArch64ISD::UADDV)
+ MAKE_CASE(AArch64ISD::SMINV)
+ MAKE_CASE(AArch64ISD::UMINV)
+ MAKE_CASE(AArch64ISD::SMAXV)
+ MAKE_CASE(AArch64ISD::UMAXV)
+ MAKE_CASE(AArch64ISD::SMAXV_PRED)
+ MAKE_CASE(AArch64ISD::UMAXV_PRED)
+ MAKE_CASE(AArch64ISD::SMINV_PRED)
+ MAKE_CASE(AArch64ISD::UMINV_PRED)
+ MAKE_CASE(AArch64ISD::ORV_PRED)
+ MAKE_CASE(AArch64ISD::EORV_PRED)
+ MAKE_CASE(AArch64ISD::ANDV_PRED)
+ MAKE_CASE(AArch64ISD::CLASTA_N)
+ MAKE_CASE(AArch64ISD::CLASTB_N)
+ MAKE_CASE(AArch64ISD::LASTA)
+ MAKE_CASE(AArch64ISD::LASTB)
+ MAKE_CASE(AArch64ISD::REV)
+ MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
+ MAKE_CASE(AArch64ISD::TBL)
+ MAKE_CASE(AArch64ISD::FADD_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::FADDA_PRED)
+ MAKE_CASE(AArch64ISD::FADDV_PRED)
+ MAKE_CASE(AArch64ISD::FMAXV_PRED)
+ MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
+ MAKE_CASE(AArch64ISD::FMINV_PRED)
+ MAKE_CASE(AArch64ISD::FMINNMV_PRED)
+ MAKE_CASE(AArch64ISD::NOT)
+ MAKE_CASE(AArch64ISD::BIT)
+ MAKE_CASE(AArch64ISD::CBZ)
+ MAKE_CASE(AArch64ISD::CBNZ)
+ MAKE_CASE(AArch64ISD::TBZ)
+ MAKE_CASE(AArch64ISD::TBNZ)
+ MAKE_CASE(AArch64ISD::TC_RETURN)
+ MAKE_CASE(AArch64ISD::PREFETCH)
+ MAKE_CASE(AArch64ISD::SITOF)
+ MAKE_CASE(AArch64ISD::UITOF)
+ MAKE_CASE(AArch64ISD::NVCAST)
+ MAKE_CASE(AArch64ISD::SQSHL_I)
+ MAKE_CASE(AArch64ISD::UQSHL_I)
+ MAKE_CASE(AArch64ISD::SRSHR_I)
+ MAKE_CASE(AArch64ISD::URSHR_I)
+ MAKE_CASE(AArch64ISD::SQSHLU_I)
+ MAKE_CASE(AArch64ISD::WrapperLarge)
+ MAKE_CASE(AArch64ISD::LD2post)
+ MAKE_CASE(AArch64ISD::LD3post)
+ MAKE_CASE(AArch64ISD::LD4post)
+ MAKE_CASE(AArch64ISD::ST2post)
+ MAKE_CASE(AArch64ISD::ST3post)
+ MAKE_CASE(AArch64ISD::ST4post)
+ MAKE_CASE(AArch64ISD::LD1x2post)
+ MAKE_CASE(AArch64ISD::LD1x3post)
+ MAKE_CASE(AArch64ISD::LD1x4post)
+ MAKE_CASE(AArch64ISD::ST1x2post)
+ MAKE_CASE(AArch64ISD::ST1x3post)
+ MAKE_CASE(AArch64ISD::ST1x4post)
+ MAKE_CASE(AArch64ISD::LD1DUPpost)
+ MAKE_CASE(AArch64ISD::LD2DUPpost)
+ MAKE_CASE(AArch64ISD::LD3DUPpost)
+ MAKE_CASE(AArch64ISD::LD4DUPpost)
+ MAKE_CASE(AArch64ISD::LD1LANEpost)
+ MAKE_CASE(AArch64ISD::LD2LANEpost)
+ MAKE_CASE(AArch64ISD::LD3LANEpost)
+ MAKE_CASE(AArch64ISD::LD4LANEpost)
+ MAKE_CASE(AArch64ISD::ST2LANEpost)
+ MAKE_CASE(AArch64ISD::ST3LANEpost)
+ MAKE_CASE(AArch64ISD::ST4LANEpost)
+ MAKE_CASE(AArch64ISD::SMULL)
+ MAKE_CASE(AArch64ISD::UMULL)
+ MAKE_CASE(AArch64ISD::FRECPE)
+ MAKE_CASE(AArch64ISD::FRECPS)
+ MAKE_CASE(AArch64ISD::FRSQRTE)
+ MAKE_CASE(AArch64ISD::FRSQRTS)
+ MAKE_CASE(AArch64ISD::STG)
+ MAKE_CASE(AArch64ISD::STZG)
+ MAKE_CASE(AArch64ISD::ST2G)
+ MAKE_CASE(AArch64ISD::STZ2G)
+ MAKE_CASE(AArch64ISD::SUNPKHI)
+ MAKE_CASE(AArch64ISD::SUNPKLO)
+ MAKE_CASE(AArch64ISD::UUNPKHI)
+ MAKE_CASE(AArch64ISD::UUNPKLO)
+ MAKE_CASE(AArch64ISD::INSR)
+ MAKE_CASE(AArch64ISD::PTEST)
+ MAKE_CASE(AArch64ISD::PTRUE)
+ MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::ST1_PRED)
+ MAKE_CASE(AArch64ISD::SST1_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
+ MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
+ MAKE_CASE(AArch64ISD::SSTNT1_PRED)
+ MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
+ MAKE_CASE(AArch64ISD::LDP)
+ MAKE_CASE(AArch64ISD::STP)
+ MAKE_CASE(AArch64ISD::STNP)
+ MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+ }
+#undef MAKE_CASE
return nullptr;
}
return LowerXALUO(Op, DAG);
case ISD::FADD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_MERGE_OP1);
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_MERGE_OP1);
case ISD::UDIV:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_MERGE_OP1);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
llvm_unreachable("Unexpected request to lower ISD::LOAD");
case ISD::ADD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_MERGE_OP1);
llvm_unreachable("Unexpected request to lower ISD::ADD");
}
}
case ISD::SHL:
if (VT.isScalableVector())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector()) {
- unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
- : AArch64ISD::SRL_PRED;
+ unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1
+ : AArch64ISD::SRL_MERGE_OP1;
return LowerToPredicatedOp(Op, DAG, Opc);
}
if (Op.getValueType().isScalableVector()) {
if (Op.getOperand(0).getValueType().isFloatingPoint())
return Op;
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
}
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
unsigned N, Opcode;
static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
- {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2}},
- {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3}},
- {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4}}};
+ {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
assert(VT.getVectorElementCount().Min % N == 0 &&
// SVE load instructions perform an implicit zero-extend, which makes them
// perfect candidates for combining.
switch (Opc) {
- case AArch64ISD::LD1:
- case AArch64ISD::LDNF1:
- case AArch64ISD::LDFF1:
+ case AArch64ISD::LD1_MERGE_ZERO:
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ case AArch64ISD::LDFF1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
break;
- case AArch64ISD::GLD1:
- case AArch64ISD::GLD1_SCALED:
- case AArch64ISD::GLD1_SXTW:
- case AArch64ISD::GLD1_SXTW_SCALED:
- case AArch64ISD::GLD1_UXTW:
- case AArch64ISD::GLD1_UXTW_SCALED:
- case AArch64ISD::GLD1_IMM:
- case AArch64ISD::GLDFF1:
- case AArch64ISD::GLDFF1_SCALED:
- case AArch64ISD::GLDFF1_SXTW:
- case AArch64ISD::GLDFF1_SXTW_SCALED:
- case AArch64ISD::GLDFF1_UXTW:
- case AArch64ISD::GLDFF1_UXTW_SCALED:
- case AArch64ISD::GLDFF1_IMM:
- case AArch64ISD::GLDNT1:
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDNT1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
- return DAG.getNode(AArch64ISD::DUP_PRED, dl, N->getValueType(0),
- N->getOperand(1), N->getOperand(2), Scalar);
+ SDValue Passthru = N->getOperand(1);
+ SDValue Pred = N->getOperand(2);
+ return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
+ Pred, Scalar, Passthru);
}
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
- return DAG.getNode(AArch64ISD::SETCC_PRED, DL, VT, Pred, N->getOperand(2),
- Splat, DAG.getCondCode(CC));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
+ N->getOperand(2), Splat, DAG.getCondCode(CC));
}
return SDValue();
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_sdiv:
- return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SDIV_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_udiv:
- return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::UDIV_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_smin:
- return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_umin:
- return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_smax:
- return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_umax:
- return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_lsl:
- return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_lsr:
- return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_asr:
- return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETUGE));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
break;
case Intrinsic::aarch64_sve_cmphi:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETUGT));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
break;
case Intrinsic::aarch64_sve_cmpge:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETGE));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETGE));
break;
case Intrinsic::aarch64_sve_cmpgt:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETGT));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETGT));
break;
case Intrinsic::aarch64_sve_cmpeq:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETEQ));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
break;
case Intrinsic::aarch64_sve_cmpne:
if (!N->getOperand(2).getValueType().isFloatingPoint())
- return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(ISD::SETNE));
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
template <unsigned Opcode>
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
- static_assert(Opcode == AArch64ISD::LD1RQ || Opcode == AArch64ISD::LD1RO,
+ static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
+ Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
InputVT
};
- return DAG.getNode(AArch64ISD::ST1, DL, N->getValueType(0), Ops);
+ return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal scatters because there's no instruction that takes
// indicies.
- if (Opcode == AArch64ISD::SSTNT1_INDEX) {
+ if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
- Opcode = AArch64ISD::SSTNT1;
+ Opcode = AArch64ISD::SSTNT1_PRED;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
- if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector())
- std::swap(Base, Offset);
+ if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
+ std::swap(Base, Offset);
// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// where #SizeInBytes is the size in bytes of the stored items. For
// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.
- if (Opcode == AArch64ISD::SST1_IMM) {
+ if (Opcode == AArch64ISD::SST1_IMM_PRED) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
SrcVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
- Opcode = AArch64ISD::SST1_UXTW;
+ Opcode = AArch64ISD::SST1_UXTW_PRED;
else
- Opcode = AArch64ISD::SST1;
+ Opcode = AArch64ISD::SST1_PRED;
std::swap(Base, Offset);
}
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal gathers because there's no instruction that takes
// indicies.
- if (Opcode == AArch64ISD::GLDNT1_INDEX) {
+ if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
- Opcode = AArch64ISD::GLDNT1;
+ Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
- if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector())
- std::swap(Base, Offset);
+ if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
+ Offset.getValueType().isVector())
+ std::swap(Base, Offset);
// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the loaded items. For
- // immediates outside that range and non-immediate scalar offsets use GLD1 or
- // GLD1_UXTW instead.
- if (Opcode == AArch64ISD::GLD1_IMM || Opcode == AArch64ISD::GLDFF1_IMM) {
+ // immediates outside that range and non-immediate scalar offsets use
+ // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
+ if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
+ Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
RetVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
- Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1_UXTW
- : AArch64ISD::GLDFF1_UXTW;
+ Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
+ : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
else
- Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1
- : AArch64ISD::GLDFF1;
+ Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ ? AArch64ISD::GLD1_MERGE_ZERO
+ : AArch64ISD::GLDFF1_MERGE_ZERO;
std::swap(Base, Offset);
}
unsigned NewOpc;
unsigned MemVTOpNum = 4;
switch (Opc) {
- case AArch64ISD::LD1:
- NewOpc = AArch64ISD::LD1S;
+ case AArch64ISD::LD1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
- case AArch64ISD::LDNF1:
- NewOpc = AArch64ISD::LDNF1S;
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
- case AArch64ISD::LDFF1:
- NewOpc = AArch64ISD::LDFF1S;
+ case AArch64ISD::LDFF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
- case AArch64ISD::GLD1:
- NewOpc = AArch64ISD::GLD1S;
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_SCALED:
- NewOpc = AArch64ISD::GLD1S_SCALED;
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_SXTW:
- NewOpc = AArch64ISD::GLD1S_SXTW;
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_SXTW_SCALED:
- NewOpc = AArch64ISD::GLD1S_SXTW_SCALED;
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_UXTW:
- NewOpc = AArch64ISD::GLD1S_UXTW;
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_UXTW_SCALED:
- NewOpc = AArch64ISD::GLD1S_UXTW_SCALED;
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_IMM:
- NewOpc = AArch64ISD::GLD1S_IMM;
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1:
- NewOpc = AArch64ISD::GLDFF1S;
+ case AArch64ISD::GLDFF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_SCALED:
- NewOpc = AArch64ISD::GLDFF1S_SCALED;
+ case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_SXTW:
- NewOpc = AArch64ISD::GLDFF1S_SXTW;
+ case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_SXTW_SCALED:
- NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED;
+ case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_UXTW:
- NewOpc = AArch64ISD::GLDFF1S_UXTW;
+ case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_UXTW_SCALED:
- NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED;
+ case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLDFF1_IMM:
- NewOpc = AArch64ISD::GLDFF1S_IMM;
+ case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
break;
- case AArch64ISD::GLDNT1:
- NewOpc = AArch64ISD::GLDNT1S;
+ case AArch64ISD::GLDNT1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
break;
default:
return SDValue();
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ld1rq:
- return performLD1ReplicateCombine<AArch64ISD::LD1RQ>(N, DAG);
+ return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ld1ro:
- return performLD1ReplicateCombine<AArch64ISD::LD1RO>(N, DAG);
+ return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_INDEX);
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1:
- return performLD1Combine(N, DAG, AArch64ISD::LD1);
+ return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnf1:
- return performLD1Combine(N, DAG, AArch64ISD::LDNF1);
+ return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1:
- return performLD1Combine(N, DAG, AArch64ISD::LDFF1);
+ return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1:
return performST1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_index:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW,
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SCALED);
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
- return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_IMM);
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1_scatter:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW,
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW,
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
+ return performScatterStoreCombine(N, DAG,
+ AArch64ISD::SST1_SXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
+ return performScatterStoreCombine(N, DAG,
+ AArch64ISD::SST1_UXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
- return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
case Intrinsic::aarch64_sve_tuple_get: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
//
//===----------------------------------------------------------------------===//
+// For predicated nodes where the entire operation is controlled by a governing
+// predicate, please stick to a similar naming convention as used for the
+// ISD nodes:
+//
+// SDNode <=> AArch64ISD
+// -------------------------------
+// _m<n> <=> _MERGE_OP<n>
+// _mt <=> _MERGE_PASSTHRU
+// _z <=> _MERGE_ZERO
+// _p <=> _PRED
+//
+// Given the context of this file, it is not strictly necessary to use _p to
+// distinguish predicated from unpredicated nodes given that most SVE
+// instructions are predicated.
+
// Contiguous loads - node definitions
//
def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64ld1 : SDNode<"AArch64ISD::LD1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s : SDNode<"AArch64ISD::LD1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
// Non-faulting & first-faulting loads - node definitions
//
-def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
// Contiguous load and replicate - node definitions
//
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64ld1rq : SDNode<"AArch64ISD::LD1RQ", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1ro : SDNode<"AArch64ISD::LD1RO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1rq_z : SDNode<"AArch64ISD::LD1RQ_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1ro_z : SDNode<"AArch64ISD::LD1RO_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
// Gather loads - node definitions
//
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
-
-def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
-
-def AArch64ldff1_gather : SDNode<"AArch64ISD::GLDFF1", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_scaled : SDNode<"AArch64ISD::GLDFF1_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_uxtw : SDNode<"AArch64ISD::GLDFF1_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_sxtw : SDNode<"AArch64ISD::GLDFF1_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_imm : SDNode<"AArch64ISD::GLDFF1_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-
-def AArch64ldff1s_gather : SDNode<"AArch64ISD::GLDFF1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_scaled : SDNode<"AArch64ISD::GLDFF1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_uxtw : SDNode<"AArch64ISD::GLDFF1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_sxtw : SDNode<"AArch64ISD::GLDFF1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-
-def AArch64ldnt1_gather : SDNode<"AArch64ISD::GLDNT1", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ldnt1s_gather : SDNode<"AArch64ISD::GLDNT1S", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_sxtw_z : SDNode<"AArch64ISD::GLD1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_sxtw_z : SDNode<"AArch64ISD::GLD1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
// Contiguous stores - node definitions
//
SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2>
]>;
-def AArch64st1 : SDNode<"AArch64ISD::ST1", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>;
// Scatter stores - node definitions
//
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
-def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
-def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
// AArch64 SVE/SVE2 - the remaining node definitions
//
def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;
def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
-def AArch64faddv_pred : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
-def AArch64fmaxv_pred : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
-def AArch64fmaxnmv_pred : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
-def AArch64fminv_pred : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
-def AArch64fminnmv_pred : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
-def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
-def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
-def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
-def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
-def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
-def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
-def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
-def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
-def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
+def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
+def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
+def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
+def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
+def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
+def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
+def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
+def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
+def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
+def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
+def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
def SDT_AArch64Arith : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
]>;
-def AArch64add_pred : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
-def AArch64fadd_pred : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
-def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
-def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
-def AArch64smin_pred : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
-def AArch64umin_pred : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
-def AArch64smax_pred : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
-def AArch64umax_pred : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
-def AArch64lsl_pred : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
-def AArch64lsr_pred : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
-def AArch64asr_pred : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
+// Merging op1 into the inactive lanes.
+def AArch64add_m1 : SDNode<"AArch64ISD::ADD_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64fadd_m1 : SDNode<"AArch64ISD::FADD_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64sdiv_m1 : SDNode<"AArch64ISD::SDIV_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64udiv_m1 : SDNode<"AArch64ISD::UDIV_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
-def AArch64fadda_pred : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
+def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
-def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>;
-def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>;
+def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
+def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;
def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;
def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ", 1>;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", 0>;
- defm ADD_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_add, AArch64add_pred>;
+ defm ADD_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_add, AArch64add_m1>;
defm SUB_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_sub>;
defm SUBR_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_subr>;
// SVE predicated integer reductions.
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>;
defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>;
- defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>;
- defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>;
- defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>;
- defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>;
- defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>;
- defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>;
- defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>;
+ defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
+ defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
+ defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
+ defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_p>;
+ defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_p>;
+ defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>;
+ defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>;
defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>;
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
- defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_pred>;
- defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_pred>;
- defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_pred>;
- defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_pred>;
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>;
+ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>;
+ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>;
defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
(MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
- defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_pred>;
- defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_pred>;
+ defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_m1>;
+ defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_m1>;
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>;
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>;
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
- defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_pred>;
- defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_pred>;
- defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_pred>;
- defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_pred>;
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>;
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>;
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>;
- defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd, AArch64fadd_pred>;
+ defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd, AArch64fadd_m1>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
// SVE floating point reductions.
- defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_pred>;
- defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_pred>;
- defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_pred>;
- defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_pred>;
- defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_pred>;
- defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_pred>;
+ defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>;
+ defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>;
+ defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>;
+ defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>;
+ defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
+ defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
// Use more efficient NEON instructions to extract elements within the NEON
// part (first 128bits) of an SVE register.
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
// Splat scalar register (predicated)
- defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_pred>;
- defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_pred>;
+ defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
+ defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
// Duplicate FP scalar into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
// Gathers using unscaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
- defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+ defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
// Gathers using scaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
- defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+ defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
// Gathers using 32-bit pointers with scaled offset, e.g.
// ld1h z0.s, p0/z, [z0.s, #16]
- defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv4i8>;
- defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>;
- defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv4i8>;
- defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv4i32>;
+ defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv4i32>;
// Gathers using 64-bit pointers with scaled offset, e.g.
// ld1h z0.d, p0/z, [z0.d, #16]
- defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm_z, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm_z, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm_z, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm_z, nxv2i64>;
// Gathers using unscaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d]
- defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_z, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_z, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather_z, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_z, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_z, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_z, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather_z, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_z, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_z, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_z, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather_z, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_z, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>;
// Gathers using scaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>;
// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
- defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
// Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
defm INDEX_II : sve_int_index_ii<"index", index_vector>;
// Unpredicated shifts
- defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_pred>;
- defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_pred>;
- defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_pred>;
+ defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>;
+ defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>;
+ defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>;
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
- defm ASR_ZPZZ : sve_int_bin_pred_zx<AArch64asr_pred>;
- defm LSR_ZPZZ : sve_int_bin_pred_zx<AArch64lsr_pred>;
- defm LSL_ZPZZ : sve_int_bin_pred_zx<AArch64lsl_pred>;
+ defm ASR_ZPZZ : sve_int_bin_pred_zx<AArch64asr_m1>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zx<AArch64lsr_m1>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zx<AArch64lsl_m1>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx<int_aarch64_sve_asrd>;
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_pred, "ASRR_ZPmZ", 1>;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_pred, "LSRR_ZPmZ", 1>;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_pred, "LSLR_ZPmZ", 1>;
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ", 1>;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ", 1>;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ", 1>;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", 0>;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", 0>;
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", 0>;
(PTEST_PP PPR:$pg, PPR:$src)>;
// LD1R of 128-bit masked data
- def : Pat<(nxv16i8 (AArch64ld1rq PPR:$gp, GPR64:$base)),
+ def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
(LD1RQ_B_IMM $gp, $base, (i64 0))>;
- def : Pat<(nxv8i16 (AArch64ld1rq PPR:$gp, GPR64:$base)),
+ def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
(LD1RQ_H_IMM $gp, $base, (i64 0))>;
- def : Pat<(nxv4i32 (AArch64ld1rq PPR:$gp, GPR64:$base)),
+ def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
(LD1RQ_W_IMM $gp, $base, (i64 0))>;
- def : Pat<(nxv2i64 (AArch64ld1rq PPR:$gp, GPR64:$base)),
+ def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
(LD1RQ_D_IMM $gp, $base, (i64 0))>;
- def : Pat<(nxv16i8 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_B_IMM $gp, $base, simm4s16:$imm)>;
- def : Pat<(nxv8i16 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_H_IMM $gp, $base, simm4s16:$imm)>;
- def : Pat<(nxv4i32 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_W_IMM $gp, $base, simm4s16:$imm)>;
- def : Pat<(nxv2i64 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
}
// 2-element contiguous loads
- defm : ld1<LD1B_D, LD1B_D_IMM, nxv2i64, AArch64ld1, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1SB_D, LD1SB_D_IMM, nxv2i64, AArch64ld1s, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1H_D, LD1H_D_IMM, nxv2i64, AArch64ld1, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
- defm : ld1<LD1SH_D, LD1SH_D_IMM, nxv2i64, AArch64ld1s, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
- defm : ld1<LD1W_D, LD1W_D_IMM, nxv2i64, AArch64ld1, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
- defm : ld1<LD1SW_D, LD1SW_D_IMM, nxv2i64, AArch64ld1s, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
- defm : ld1<LD1D, LD1D_IMM, nxv2i64, AArch64ld1, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
- defm : ld1<LD1D, LD1D_IMM, nxv2f64, AArch64ld1, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
+ defm : ld1<LD1B_D, LD1B_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_D, LD1SB_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H_D, LD1H_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1SH_D, LD1SH_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1W_D, LD1W_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1SW_D, LD1SW_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1D, LD1D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
+ defm : ld1<LD1D, LD1D_IMM, nxv2f64, AArch64ld1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
// 4-element contiguous loads
- defm : ld1<LD1B_S, LD1B_S_IMM, nxv4i32, AArch64ld1, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1SB_S, LD1SB_S_IMM, nxv4i32, AArch64ld1s, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1H_S, LD1H_S_IMM, nxv4i32, AArch64ld1, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
- defm : ld1<LD1SH_S, LD1SH_S_IMM, nxv4i32, AArch64ld1s, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
- defm : ld1<LD1W, LD1W_IMM, nxv4i32, AArch64ld1, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
- defm : ld1<LD1W, LD1W_IMM, nxv4f32, AArch64ld1, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1B_S, LD1B_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_S, LD1SB_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H_S, LD1H_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1SH_S, LD1SH_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1W, LD1W_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1W, LD1W_IMM, nxv4f32, AArch64ld1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
// 8-element contiguous loads
- defm : ld1<LD1B_H, LD1B_H_IMM, nxv8i16, AArch64ld1, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
- defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
- defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1B_H, LD1B_H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
let Predicates = [HasBF16, HasSVE] in {
- defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
}
// 16-element contiguous loads
- defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
// scalar + immediate (mul vl)
}
// 2-element contiguous non-faulting loads
- defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i8>;
- defm : ldnf1<LDNF1SB_D_IMM, nxv2i64, AArch64ldnf1s, nxv2i1, nxv2i8>;
- defm : ldnf1<LDNF1H_D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i16>;
- defm : ldnf1<LDNF1SH_D_IMM, nxv2i64, AArch64ldnf1s, nxv2i1, nxv2i16>;
- defm : ldnf1<LDNF1W_D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i32>;
- defm : ldnf1<LDNF1SW_D_IMM, nxv2i64, AArch64ldnf1s, nxv2i1, nxv2i32>;
- defm : ldnf1<LDNF1D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i64>;
- defm : ldnf1<LDNF1D_IMM, nxv2f64, AArch64ldnf1, nxv2i1, nxv2f64>;
+ defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i8>;
+ defm : ldnf1<LDNF1SB_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i8>;
+ defm : ldnf1<LDNF1H_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i16>;
+ defm : ldnf1<LDNF1SH_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i16>;
+ defm : ldnf1<LDNF1W_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i32>;
+ defm : ldnf1<LDNF1SW_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i32>;
+ defm : ldnf1<LDNF1D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i64>;
+ defm : ldnf1<LDNF1D_IMM, nxv2f64, AArch64ldnf1_z, nxv2i1, nxv2f64>;
// 4-element contiguous non-faulting loads
- defm : ldnf1<LDNF1B_S_IMM, nxv4i32, AArch64ldnf1, nxv4i1, nxv4i8>;
- defm : ldnf1<LDNF1SB_S_IMM, nxv4i32, AArch64ldnf1s, nxv4i1, nxv4i8>;
- defm : ldnf1<LDNF1H_S_IMM, nxv4i32, AArch64ldnf1, nxv4i1, nxv4i16>;
- defm : ldnf1<LDNF1SH_S_IMM, nxv4i32, AArch64ldnf1s, nxv4i1, nxv4i16>;
- defm : ldnf1<LDNF1W_IMM, nxv4i32, AArch64ldnf1, nxv4i1, nxv4i32>;
- defm : ldnf1<LDNF1W_IMM, nxv4f32, AArch64ldnf1, nxv4i1, nxv4f32>;
+ defm : ldnf1<LDNF1B_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i8>;
+ defm : ldnf1<LDNF1SB_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i8>;
+ defm : ldnf1<LDNF1H_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i16>;
+ defm : ldnf1<LDNF1SH_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i16>;
+ defm : ldnf1<LDNF1W_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i32>;
+ defm : ldnf1<LDNF1W_IMM, nxv4f32, AArch64ldnf1_z, nxv4i1, nxv4f32>;
// 8-element contiguous non-faulting loads
- defm : ldnf1<LDNF1B_H_IMM, nxv8i16, AArch64ldnf1, nxv8i1, nxv8i8>;
- defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s, nxv8i1, nxv8i8>;
- defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1, nxv8i1, nxv8i16>;
- defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1, nxv8i1, nxv8f16>;
+ defm : ldnf1<LDNF1B_H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i8>;
+ defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s_z, nxv8i1, nxv8i8>;
+ defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i16>;
+ defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1_z, nxv8i1, nxv8f16>;
let Predicates = [HasBF16, HasSVE] in {
- defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1, nxv8i1, nxv8bf16>;
+ defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
}
// 16-element contiguous non-faulting loads
- defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1, nxv16i1, nxv16i8>;
+ defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1_z, nxv16i1, nxv16i8>;
multiclass ldff1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
// reg + reg
}
// 2-element contiguous first faulting loads
- defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
- defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
- defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
- defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1, nxv2i1, nxv2f32, am_sve_regreg_lsl2>;
- defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
+ defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
+ defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1_z, nxv2i1, nxv2f32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
// 4-element contiguous first faulting loads
- defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
- defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
// 8-element contiguous first faulting loads
- defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
- defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
let Predicates = [HasBF16, HasSVE] in {
- defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
}
// 16-element contiguous first faulting loads
- defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
let Predicates = [HasSVE, HasMatMulFP64] in {
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
- defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro>;
- defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro>;
- defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro>;
- defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro>;
- defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro, am_sve_regreg_lsl0>;
- defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro, am_sve_regreg_lsl1>;
- defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro, am_sve_regreg_lsl2>;
- defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro, am_sve_regreg_lsl3>;
+ defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
+ defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
+ defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
+ defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro_z>;
+ defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro_z, am_sve_regreg_lsl0>;
+ defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>;
+ defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>;
+ defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>;
defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>;
defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>;
defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>;
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
// SVE2 non-temporal gather loads
- defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather, nxv4i8>;
- defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather, nxv4i8>;
- defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather, nxv4i16>;
- defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather, nxv4i16>;
- defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather, nxv4i32>;
-
- defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather, nxv2i8>;
- defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather, nxv2i8>;
- defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather, nxv2i16>;
- defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather, nxv2i16>;
- defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather, nxv2i32>;
- defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather, nxv2i32>;
- defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather, nxv2i64>;
+ defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>;
+ defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>;
+ defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv4i16>;
+ defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather_z, nxv4i16>;
+ defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather_z, nxv4i32>;
+
+ defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv2i8>;
+ defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather_z, nxv2i8>;
+ defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv2i16>;
+ defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather_z, nxv2i16>;
+ defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>;
+ defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>;
+ defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>;
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;