``XTheadMac``
LLVM implements `the XTheadMac (multiply-accumulate instructions) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
+``XTHeadMemPair``
+ LLVM implements `the THeadMemPair (two-GPR memory operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
+
``XTHeadVdot``
LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.0/xthead-2022-12-04-2.2.0.pdf>`_ by T-HEAD of Alibaba. All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above.
* Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension.
* Adds support for the vendor-defined XTHeadBs (single-bit) extension.
* Adds support for the vendor-defined XTHeadMac (multiply-accumulate instructions) extension.
+* Added support for the vendor-defined XTHeadMemPair (two-GPR memory operations)
+ extension disassembler/assembler.
Changes to the WebAssembly Backend
----------------------------------
{"xtheadbb", RISCVExtensionVersion{1, 0}},
{"xtheadbs", RISCVExtensionVersion{1, 0}},
{"xtheadmac", RISCVExtensionVersion{1, 0}},
+ {"xtheadmempair", RISCVExtensionVersion{1, 0}},
{"xtheadvdot", RISCVExtensionVersion{1, 0}},
{"xventanacondops", RISCVExtensionVersion{1, 0}},
};
}
}
+ unsigned Opcode = Inst.getOpcode();
+
+ if (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_LWUD ||
+ Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_SDD ||
+ Opcode == RISCV::TH_SWD) {
+ unsigned Rd1 = Inst.getOperand(0).getReg();
+ unsigned Rd2 = Inst.getOperand(1).getReg();
+ unsigned Rs1 = Inst.getOperand(2).getReg();
+ // The encoding with rd1 == rd2 == rs1 is reserved.
+ if (Rs1 == Rd1 && Rs1 == Rd2) {
+ SMLoc Loc = Operands[1]->getStartLoc();
+ return Error(Loc, "The source register and destination registers "
+ "cannot be equal.");
+ }
+
+ bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD ||
+ Opcode == RISCV::TH_SWD);
+ // The last operand must be constant 3 or 4 depending on the data width.
+ if (IsWordOp && Inst.getOperand(4).getImm() != 3) {
+ SMLoc Loc = Operands.back()->getStartLoc();
+ return Error(Loc, "Operand must be constant 3.");
+ } else if (!IsWordOp && Inst.getOperand(4).getImm() != 4) {
+ SMLoc Loc = Operands.back()->getStartLoc();
+ return Error(Loc, "Operand must be constant 4.");
+ }
+ }
+
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags);
if (Constraints == RISCVII::NoConstraint)
if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
// vadc, vsbc are special cases. These instructions have no mask register.
// The destination register could not be V0.
- unsigned Opcode = Inst.getOpcode();
if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+
#include "RISCVGenDisassemblerTables.inc"
static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rd1 = fieldFromInstruction(Insn, 7, 5);
+ uint32_t Rs1 = fieldFromInstruction(Insn, 15, 5);
+ uint32_t Rd2 = fieldFromInstruction(Insn, 20, 5);
+ uint32_t UImm2 = fieldFromInstruction(Insn, 25, 2);
+ DecodeGPRRegisterClass(Inst, Rd1, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rd2, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
+ DecodeStatus Result = decodeUImmOperand<2>(Inst, UImm2, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid immediate");
+
+ // Disassemble the final operand which is implicit.
+ unsigned Opcode = Inst.getOpcode();
+ bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD ||
+ Opcode == RISCV::TH_SWD);
+ if (IsWordOp) {
+ Inst.addOperand(MCOperand::createImm(3));
+ } else {
+ Inst.addOperand(MCOperand::createImm(4));
+ }
+ return MCDisassembler::Success;
+}
+
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
if (Result != MCDisassembler::Fail)
return Result;
}
+ if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadMemPair]) {
+ LLVM_DEBUG(dbgs() << "Trying XTHeadMemPair custom opcode table:\n");
+ Result = decodeInstruction(DecoderTableTHeadMemPair32, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail)
+ return Result;
+ }
if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadVdot]) {
LLVM_DEBUG(dbgs() << "Trying XTHeadVdot custom opcode table:\n");
Result =
AssemblerPredicate<(all_of FeatureVendorXTHeadMac),
"'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
+def FeatureVendorXTHeadMemPair
+ : SubtargetFeature<"xtheadmempair", "HasVendorXTHeadMemPair", "true",
+ "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+def HasVendorXTHeadMemPair : Predicate<"Subtarget->hasVendorXTHeadMemPair()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadMemPair),
+ "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+
def FeatureVendorXTHeadVdot
: SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true",
"'xtheadvdot' (T-Head Vector Extensions for Dot)",
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
+ if (Subtarget.hasVendorXTHeadMemPair())
+ setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);
return InputRootReplacement;
}
+// Helper function for performMemPairCombine.
+// Try to combine the memory loads/stores LSNode1 and LSNode2
+// into a single memory pair operation.
+static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
+ LSBaseSDNode *LSNode2, SDValue BasePtr,
+ uint64_t Imm) {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
+
+ if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ // The new operation has twice the width.
+ MVT XLenVT = Subtarget.getXLenVT();
+ EVT MemVT = LSNode1->getMemoryVT();
+ EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
+ MachineMemOperand *MMO = LSNode1->getMemOperand();
+ MachineMemOperand *NewMMO = MF.getMachineMemOperand(
+ MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
+
+ if (LSNode1->getOpcode() == ISD::LOAD) {
+ auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
+ unsigned Opcode;
+ if (MemVT == MVT::i32)
+ Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
+ else
+ Opcode = RISCVISD::TH_LDD;
+
+ SDValue Res = DAG.getMemIntrinsicNode(
+ Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
+ {LSNode1->getChain(), BasePtr,
+ DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+ NewMemVT, NewMMO);
+
+ SDValue Node1 =
+ DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
+ SDValue Node2 =
+ DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
+
+ DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
+ return Node1;
+ } else {
+ unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
+
+ SDValue Res = DAG.getMemIntrinsicNode(
+ Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
+ {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
+ BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+ NewMemVT, NewMMO);
+
+ DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
+ return Res;
+ }
+}
+
+// Try to combine two adjacent loads/stores to a single pair instruction from
+// the XTHeadMemPair vendor extension.
+static SDValue performMemPairCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ MachineFunction &MF = DAG.getMachineFunction();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ // Target does not support load/store pair.
+ if (!Subtarget.hasVendorXTHeadMemPair())
+ return SDValue();
+
+ LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
+ EVT MemVT = LSNode1->getMemoryVT();
+ unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
+
+ // No volatile, indexed or atomic loads/stores.
+ if (!LSNode1->isSimple() || LSNode1->isIndexed())
+ return SDValue();
+
+ // Function to get a base + constant representation from a memory value.
+ auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
+ if (Ptr->getOpcode() == ISD::ADD) {
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ return {Ptr->getOperand(0), C1->getZExtValue()};
+ }
+ }
+ return {Ptr, 0};
+ };
+
+ auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
+
+ SDValue Chain = N->getOperand(0);
+ for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getUser() != N && Use.getResNo() == 0 &&
+ Use.getUser()->getOpcode() == N->getOpcode()) {
+ LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
+
+ // No volatile, indexed or atomic loads/stores.
+ if (!LSNode2->isSimple() || LSNode2->isIndexed())
+ continue;
+
+ // Check if LSNode1 and LSNode2 have the same type and extension.
+ if (LSNode1->getOpcode() == ISD::LOAD)
+ if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
+ cast<LoadSDNode>(LSNode1)->getExtensionType())
+ continue;
+
+ if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
+ continue;
+
+ auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
+
+ // Check if the base pointer is the same for both instruction.
+ if (Base1 != Base2)
+ continue;
+
+ // Check if the offsets match the XTHeadMemPair encoding contraints.
+ if (MemVT == MVT::i32) {
+ // Check for adjucent i32 values and a 2-bit index.
+ if ((Offset1 + 4 != Offset2) || !isShiftedUInt<2, 3>(Offset1))
+ continue;
+ } else {
+ // Check for adjucent i64 values and a 2-bit index.
+ if ((Offset1 + 8 != Offset2) || !isShiftedUInt<2, 4>(Offset1))
+ continue;
+ }
+
+ // Try to combine.
+ if (SDValue Res =
+ tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
+ return Res;
+ }
+ }
+
+ return SDValue();
+}
+
// Fold
// (fp_to_int (froundeven X)) -> fcvt X, rne
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
VL);
}
+ case ISD::LOAD:
case ISD::STORE: {
+ if (DCI.isAfterLegalizeDAG())
+ if (SDValue V = performMemPairCombine(N, DCI))
+ return V;
+
+ if (N->getOpcode() != ISD::STORE)
+ break;
+
auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
NODE_NAME_CASE(ORC_B)
NODE_NAME_CASE(ZIP)
NODE_NAME_CASE(UNZIP)
+ NODE_NAME_CASE(TH_LWD)
+ NODE_NAME_CASE(TH_LWUD)
+ NODE_NAME_CASE(TH_LDD)
+ NODE_NAME_CASE(TH_SWD)
+ NODE_NAME_CASE(TH_SDD)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
// Load address.
LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
LA_TLS_IE,
+
+ TH_LWD,
+ TH_LWUD,
+ TH_LDD,
+ TH_SWD,
+ TH_SDD,
};
} // namespace RISCVISD
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// T-HEAD C specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_TDBLD : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+def SDT_TDBST : SDTypeProfile<0, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+
+def TH_TLWUD : SDNode<"RISCVISD::TH_LWUD", SDT_TDBLD,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TLWD : SDNode<"RISCVISD::TH_LWD", SDT_TDBLD,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TLDD : SDNode<"RISCVISD::TH_LDD", SDT_TDBLD,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def TH_TSWD : SDNode<"RISCVISD::TH_SWD", SDT_TDBST,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def TH_TSDD : SDNode<"RISCVISD::TH_SDD", SDT_TDBST,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
class THInstVdotVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Constraints = "$rd_wb = $rd";
}
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+ hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class THLoadPair<bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
+ (outs GPR:$rd, GPR:$rs2), (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+ let DecoderMethod = "decodeXTHeadMemPair";
+ let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
+}
+
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class THStorePair<bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
+ (outs), (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+ let DecoderMethod = "decodeXTHeadMemPair";
+}
+
//===----------------------------------------------------------------------===//
// Combination of instruction classes.
def TH_MULSW : THMulAccumulate_rr<0b0010011, "th.mulsw">;
} // Predicates = [HasVendorXTHeadMac, IsRV64]
+let Predicates = [HasVendorXTHeadMemPair] in {
+def TH_LWUD : THLoadPair<0b11110, "th.lwud">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+def TH_SWD : THStorePair<0b11100, "th.swd">,
+ Sched<[WriteSTW, WriteSTW, ReadStoreData, ReadMemBase]>;
+let IsSignExtendingOpW = 1 in
+def TH_LWD : THLoadPair<0b11100, "th.lwd">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def TH_LDD : THLoadPair<0b11111, "th.ldd">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
+def TH_SDD : THStorePair<0b11111, "th.sdd">,
+ Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
+}
+
let Predicates = [HasVendorXTHeadVdot],
Constraints = "@earlyclobber $vd",
RVVConstraint = WidenV in {
defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
}
+
+def uimm2_3_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 0x3,
+ SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_3 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+ return isShiftedUInt<2, 3>(Imm);
+}], uimm2_3_XFORM>;
+
+def uimm2_4_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 0x3,
+ SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+ return isShiftedUInt<2, 4>(Imm);
+}], uimm2_4_XFORM>;
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def : Pat<(TH_TLWUD i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>;
+def : Pat<(TH_TLDD i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+
+def : Pat<(TH_TSDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4),
+ (TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair] in {
+ def : Pat<(TH_TLWD GPR:$rs1, uimm2_3:$uimm2_3), (TH_LWD GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+ def : Pat<(TH_TSWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3),
+ (TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+}
; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s
; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV32XTHEADMAC %s
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV32XTHEADMEMPAIR %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCA %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCB %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCD %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADMAC %s
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV64XTHEADMEMPAIR %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefixes=CHECK,RV64ZTSO %s
; RV32SVPBMT: .attribute 5, "rv32i2p0_svpbmt1p0"
; RV32SVINVAL: .attribute 5, "rv32i2p0_svinval1p0"
; RV32XTHEADMAC: .attribute 5, "rv32i2p0_xtheadmac1p0"
+; RV32XTHEADMEMPAIR: .attribute 5, "rv32i2p0_xtheadmempair1p0"
; RV32ZCA: .attribute 5, "rv32i2p0_zca1p0"
; RV32ZCB: .attribute 5, "rv32i2p0_zca1p0_zcb1p0"
; RV32ZCD: .attribute 5, "rv32i2p0_zcd1p0"
; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0"
; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
; RV64XTHEADMAC: .attribute 5, "rv64i2p0_xtheadmac1p0"
+; RV64XTHEADMEMPAIR: .attribute 5, "rv64i2p0_xtheadmempair1p0"
; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"
; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0"
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMPAIR
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMPAIR
+
+define i64 @lwd(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwd:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT: srai a3, a1, 31
+; RV32XTHEADMEMPAIR-NEXT: srai a4, a2, 31
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: add a3, a3, a4
+; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwd:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 4
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %a, i64 5
+ %4 = load i32, i32* %3, align 4
+ %5 = sext i32 %2 to i64
+ %6 = sext i32 %4 to i64
+ %7 = add i64 %5, %6
+ ret i64 %7
+}
+
+define i64 @lwud(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwud:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwud:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.lwud a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 4
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %a, i64 5
+ %4 = load i32, i32* %3, align 4
+ %5 = zext i32 %2 to i64
+ %6 = zext i32 %4 to i64
+ %7 = add i64 %5, %6
+ ret i64 %7
+}
+
+define i64 @ldd(i64* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ldd:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: lw a1, 32(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a2, 36(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a3, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a0, 40(a0)
+; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a3
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a0
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ldd:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.ldd a1, a2, (a0), 2, 4
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i64, i64* %a, i64 4
+ %2 = load i64, i64* %1, align 8
+ %3 = getelementptr i64, i64* %a, i64 5
+ %4 = load i64, i64* %3, align 8
+ %5 = add i64 %2, %4
+ ret i64 %5
+}
+
+define i64 @lwd_0(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwd_0:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT: srai a3, a1, 31
+; RV32XTHEADMEMPAIR-NEXT: srai a4, a2, 31
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: add a3, a3, a4
+; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwd_0:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %a, i64 1
+ %4 = load i32, i32* %3, align 4
+ %5 = sext i32 %2 to i64
+ %6 = sext i32 %4 to i64
+ %7 = add i64 %5, %6
+ ret i64 %7
+}
+
+define i64 @lwud_0(i32* %a) {
+; RV32XTHEADMEMPAIR-LABEL: lwud_0:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: lwud_0:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.lwud a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %a, i64 1
+ %4 = load i32, i32* %3, align 4
+ %5 = zext i32 %2 to i64
+ %6 = zext i32 %4 to i64
+ %7 = add i64 %5, %6
+ ret i64 %7
+}
+
+define i64 @ldd_0(i64* %a) {
+; RV32XTHEADMEMPAIR-LABEL: ldd_0:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT: th.lwd a3, a4, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a4
+; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a3
+; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: ldd_0:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.ldd a1, a2, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i64, i64* %a, i64 0
+ %2 = load i64, i64* %1, align 8
+ %3 = getelementptr i64, i64* %a, i64 1
+ %4 = load i64, i64* %3, align 8
+ %5 = add i64 %2, %4
+ ret i64 %5
+}
+
+define void @swd(i32* %a, i32 %b, i32%c) {
+; RV32XTHEADMEMPAIR-LABEL: swd:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 2, 3
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: swd:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 2, 3
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 4
+ store i32 %b, i32* %1, align 4
+ %2 = getelementptr i32, i32* %a, i64 5
+ store i32 %c, i32* %2, align 4
+ ret void
+}
+
+define void @sdd(i64* %a, i64 %b, i64%c) {
+; RV32XTHEADMEMPAIR-LABEL: sdd:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0)
+; RV32XTHEADMEMPAIR-NEXT: sw a1, 32(a0)
+; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT: sw a3, 40(a0)
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sdd:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.sdd a1, a2, (a0), 2, 4
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i64, i64* %a, i64 4
+ store i64 %b, i64* %1, align 8
+ %2 = getelementptr i64, i64* %a, i64 5
+ store i64 %c, i64* %2, align 8
+ ret void
+}
+
+define void @swd_0(i32* %a, i32 %b, i32%c) {
+; RV32XTHEADMEMPAIR-LABEL: swd_0:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: swd_0:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i32, i32* %a, i64 0
+ store i32 %b, i32* %1, align 4
+ %2 = getelementptr i32, i32* %a, i64 1
+ store i32 %c, i32* %2, align 4
+ ret void
+}
+
+define void @sdd_0(i64* %a, i64 %b, i64%c) {
+; RV32XTHEADMEMPAIR-LABEL: sdd_0:
+; RV32XTHEADMEMPAIR: # %bb.0:
+; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
+; RV32XTHEADMEMPAIR-NEXT: th.swd a3, a4, (a0), 1, 3
+; RV32XTHEADMEMPAIR-NEXT: ret
+;
+; RV64XTHEADMEMPAIR-LABEL: sdd_0:
+; RV64XTHEADMEMPAIR: # %bb.0:
+; RV64XTHEADMEMPAIR-NEXT: th.sdd a1, a2, (a0), 0, 4
+; RV64XTHEADMEMPAIR-NEXT: ret
+ %1 = getelementptr i64, i64* %a, i64 0
+ store i64 %b, i64* %1, align 8
+ %2 = getelementptr i64, i64* %a, i64 1
+ store i64 %c, i64* %2, align 8
+ ret void
+}
--- /dev/null
+# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s
+
+th.ldd t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:22: error: invalid operand for instruction
+th.ldd t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.ldd t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.sdd a0, a1, (a2), 5, 4 # CHECK: [[@LINE]]:22: error: invalid operand for instruction
+th.sdd a0, a1, (a2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.sdd a0, a1, (a2), 3, 5 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.lwud t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
+th.lwud t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwud t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
+th.lwd a3, a4, (a5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.lwd a3, a4, (a5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwd a3, a4, (a5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.swd t3, t4, (t5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.swd t3, t4, (t5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.swd t3, t4, (t5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.lwud x6, x6, (x6), 2, 3 # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
+th.ldd t0, t1, (t2), 2, 4 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.sdd t0, t1, (t2), 2, 4 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
--- /dev/null
+# With Bitmanip base extension:
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadmempair -show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadmempair < %s \
+# RUN: | llvm-objdump --mattr=+xtheadmempair -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.lwd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
+th.lwd a0, a1, (a2), 1, 3
+
+# CHECK-ASM-AND-OBJ: th.lwud
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
+th.lwud a0, a1, (a2), 2, 3
+
+# CHECK-ASM-AND-OBJ: th.swd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
+th.swd a0, a1, (a2), 0, 3
--- /dev/null
+# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s
+
+th.ldd t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.ldd t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.ldd t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
+th.sdd a0, a1, (a2), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.sdd a0, a1, (a2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.sdd a0, a1, (a2), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
+th.lwud t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
+th.lwud t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwud t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
+th.lwd a3, a4, (a5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.lwd a3, a4, (a5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.lwd a3, a4, (a5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.swd t3, t4, (t5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
+th.swd t3, t4, (t5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
+th.swd t3, t4, (t5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
+th.lwud x6, x6, (x6), 2, 3 # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
--- /dev/null
+# With Bitmanip base extension:
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadmempair -show-encoding \
+# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadmempair < %s \
+# RUN: | llvm-objdump --mattr=+xtheadmempair -d -r - \
+# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.lwd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
+th.lwd a0, a1, (a2), 1, 3
+
+# CHECK-ASM-AND-OBJ: th.lwud
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
+th.lwud a0, a1, (a2), 2, 3
+
+# CHECK-ASM-AND-OBJ: th.swd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
+th.swd a0, a1, (a2), 0, 3
+
+# CHECK-ASM-AND-OBJ: th.ldd
+# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf8]
+th.ldd a0, a1, (a2), 0, 4
+
+# CHECK-ASM-AND-OBJ: th.sdd
+# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xfe]
+th.sdd a0, a1, (a2), 3, 4