From f6262201d8fd52260c205f7426cb22b18ee06ac7 Mon Sep 17 00:00:00 2001 From: Manolis Tsamis Date: Fri, 24 Feb 2023 00:04:08 +0100 Subject: [PATCH] [RISCV] Add vendor-defined XTheadMemIdx (Indexed Memory Operations) extension The vendor-defined XTHeadMemIdx (no comparable standard extension exists at the time of writing) extension adds indexed load/store instructions as well as load/store and update register instructions. It is supported by the C9xx cores (e.g., found in the wild in the Allwinner D1) by Alibaba T-Head. The current (as of this commit) public documentation for this extension is available at: https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf Support for these instructions has already landed in GNU Binutils: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=27cfd142d0a7e378d19aa9a1278e2137f849b71b Depends on D144002 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D144249 --- llvm/docs/RISCVUsage.rst | 3 + llvm/docs/ReleaseNotes.rst | 2 + llvm/lib/Support/RISCVISAInfo.cpp | 1 + .../RISCV/Disassembler/RISCVDisassembler.cpp | 7 + llvm/lib/Target/RISCV/RISCVFeatures.td | 7 + llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 153 +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 27 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 106 ++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 10 + llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td | 261 +++++ llvm/test/CodeGen/RISCV/attributes.ll | 4 + llvm/test/CodeGen/RISCV/xtheadmemidx.ll | 1064 ++++++++++++++++++++ llvm/test/MC/RISCV/rv32xtheadmemidx-invalid.s | 14 + llvm/test/MC/RISCV/rv32xtheadmemidx-valid.s | 133 +++ llvm/test/MC/RISCV/rv64xtheadmemidx-invalid.s | 13 + llvm/test/MC/RISCV/rv64xtheadmemidx-valid.s | 181 ++++ 16 files changed, 1986 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/xtheadmemidx.ll create mode 100644 llvm/test/MC/RISCV/rv32xtheadmemidx-invalid.s create mode 100644 llvm/test/MC/RISCV/rv32xtheadmemidx-valid.s create mode 100644 llvm/test/MC/RISCV/rv64xtheadmemidx-invalid.s create mode 100644 llvm/test/MC/RISCV/rv64xtheadmemidx-valid.s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 5751211..45a4ac9 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -192,6 +192,9 @@ The current vendor extensions supported are: ``XTheadMac`` LLVM implements `the XTheadMac (multiply-accumulate instructions) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. +``XTHeadMemIdx`` + LLVM implements `the THeadMemIdx (indexed memory operations) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. + ``XTHeadMemPair`` LLVM implements `the THeadMemPair (two-GPR memory operations) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 6773424..82fad22 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -114,6 +114,8 @@ Changes to the RISC-V Backend * Adds support for the vendor-defined XTHeadMac (multiply-accumulate instructions) extension. * Added support for the vendor-defined XTHeadMemPair (two-GPR memory operations) extension disassembler/assembler. +* Added support for the vendor-defined XTHeadMemIdx (indexed memory operations) + extension disassembler/assembler. * Support for the now-ratified Zawrs extension is no longer experimental. * Adds support for the vendor-defined XTHeadCmo (cache management operations) extension. * Adds support for the vendor-defined XTHeadSync (multi-core synchronization instructions) extension. diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index e051585..7cd92b4 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -118,6 +118,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"xtheadbs", RISCVExtensionVersion{1, 0}}, {"xtheadcmo", RISCVExtensionVersion{1, 0}}, {"xtheadmac", RISCVExtensionVersion{1, 0}}, + {"xtheadmemidx", RISCVExtensionVersion{1, 0}}, {"xtheadmempair", RISCVExtensionVersion{1, 0}}, {"xtheadsync", RISCVExtensionVersion{1, 0}}, {"xtheadvdot", RISCVExtensionVersion{1, 0}}, diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e278edc..807d1e9 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -534,6 +534,13 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Result != MCDisassembler::Fail) return Result; } + if (STI.hasFeature(RISCV::FeatureVendorXTHeadMemIdx)) { + LLVM_DEBUG(dbgs() << "Trying XTHeadMemIdx custom opcode table:\n"); + Result = decodeInstruction(DecoderTableTHeadMemIdx32, MI, Insn, Address, + this, STI); + if (Result != MCDisassembler::Fail) + return Result; + } if (STI.hasFeature(RISCV::FeatureVendorXTHeadMemPair)) { LLVM_DEBUG(dbgs() << "Trying XTHeadMemPair custom opcode table:\n"); Result = decodeInstruction(DecoderTableTHeadMemPair32, MI, Insn, Address, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 5107f0d..9cd8636 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -519,6 +519,13 @@ def HasVendorXTHeadMac : Predicate<"Subtarget->hasVendorXTHeadMac()">, AssemblerPredicate<(all_of FeatureVendorXTHeadMac), "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">; +def FeatureVendorXTHeadMemIdx + : SubtargetFeature<"xtheadmemidx", "HasVendorXTHeadMemIdx", "true", + "'xtheadmemidx' (T-Head Indexed Memory Operations)">; +def HasVendorXTHeadMemIdx : Predicate<"Subtarget->hasVendorXTHeadMemIdx()">, + AssemblerPredicate<(all_of FeatureVendorXTHeadMemIdx), + "'xtheadmemidx' (T-Head Indexed Memory Operations)">; + def FeatureVendorXTHeadMemPair : SubtargetFeature<"xtheadmempair", "HasVendorXTHeadMemPair", "true", "'xtheadmempair' (T-Head two-GPR Memory Operations)">; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 0798fcb..604ad1f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -728,6 +728,77 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { return false; } +bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { + // Target does not support indexed loads. + if (!Subtarget->hasVendorXTHeadMemIdx()) + return false; + + LoadSDNode *Ld = cast(Node); + ISD::MemIndexedMode AM = Ld->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + + const ConstantSDNode *C = dyn_cast(Ld->getOffset()); + if (!C) + return false; + + EVT LoadVT = Ld->getMemoryVT(); + bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); + bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC); + int64_t Offset = C->getSExtValue(); + + // Convert decrements to increments by a negative quantity. + if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC) + Offset = -Offset; + + // The constants that can be encoded in the THeadMemIdx instructions + // are of the form (sign_extend(imm5) << imm2). + int64_t Shift; + for (Shift = 0; Shift < 4; Shift++) + if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) + break; + + // Constant cannot be encoded. + if (Shift == 4) + return false; + + bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); + unsigned Opcode; + if (LoadVT == MVT::i8 && IsPre) + Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; + else if (LoadVT == MVT::i8 && IsPost) + Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; + else if (LoadVT == MVT::i16 && IsPre) + Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; + else if (LoadVT == MVT::i16 && IsPost) + Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; + else if (LoadVT == MVT::i32 && IsPre) + Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; + else if (LoadVT == MVT::i32 && IsPost) + Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; + else if (LoadVT == MVT::i64 && IsPre) + Opcode = RISCV::TH_LDIB; + else if (LoadVT == MVT::i64 && IsPost) + Opcode = RISCV::TH_LDIA; + else + return false; + + EVT Ty = Ld->getOffset().getValueType(); + SDValue Ops[] = {Ld->getBasePtr(), + CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty), + CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), + Ld->getChain()}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), + Ld->getValueType(1), MVT::Other, Ops); + + MachineMemOperand *MemOp = cast(Node)->getMemOperand(); + CurDAG->setNodeMemRefs(cast(New), {MemOp}); + + ReplaceNode(Node, New); + + return true; +} + void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. if (Node->isMachineOpcode()) { @@ -1272,6 +1343,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, MULHU); return; } + case ISD::LOAD: { + if (tryIndexedLoad(Node)) + return; + break; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { @@ -2136,6 +2212,60 @@ static bool isWorthFoldingAdd(SDValue Add) { return true; } +bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, + unsigned MaxShiftAmount, + SDValue &Base, SDValue &Index, + SDValue &Scale) { + EVT VT = Addr.getSimpleValueType(); + auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, + SDValue &Shift) { + uint64_t ShiftAmt = 0; + Index = N; + + if (N.getOpcode() == ISD::SHL && isa(N.getOperand(1))) { + // Only match shifts by a value in range [0, MaxShiftAmount]. + if (N.getConstantOperandVal(1) <= MaxShiftAmount) { + Index = N.getOperand(0); + ShiftAmt = N.getConstantOperandVal(1); + } + } + + Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); + return ShiftAmt != 0; + }; + + if (Addr.getOpcode() == ISD::ADD) { + if (auto *C1 = dyn_cast(Addr.getOperand(1))) { + SDValue AddrB = Addr.getOperand(0); + if (AddrB.getOpcode() == ISD::ADD && + UnwrapShl(AddrB.getOperand(0), Index, Scale) && + !isa(AddrB.getOperand(1)) && + isInt<12>(C1->getSExtValue())) { + // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) + SDValue C1Val = + CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); + Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, + AddrB.getOperand(1), C1Val), + 0); + return true; + } + } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { + Base = Addr.getOperand(1); + return true; + } else { + UnwrapShl(Addr.getOperand(1), Index, Scale); + Base = Addr.getOperand(0); + return true; + } + } else if (UnwrapShl(Addr, Index, Scale)) { + EVT VT = Addr.getValueType(); + Base = CurDAG->getRegister(RISCV::X0, VT); + return true; + } + + return false; +} + bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) { if (SelectAddrFrameIndex(Addr, Base, Offset)) @@ -2631,6 +2761,29 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, return true; } +// Select a constant that can be represented as (sign_extend(imm5) << imm2). +bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, + SDValue &Shl2) { + if (auto *C = dyn_cast(N)) { + int64_t Offset = C->getSExtValue(); + int64_t Shift; + for (Shift = 0; Shift < 4; Shift++) + if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) + break; + + // Constant cannot be encoded. + if (Shift == 4) + return false; + + EVT Ty = N->getValueType(0); + Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty); + Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty); + return true; + } + + return false; +} + // Select VL as a 5 bit immediate or a value that will become a register. This // allows us to choose betwen VSETIVLI or VSETVLI later. bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 223282e..3e023f4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -50,8 +50,33 @@ public: bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, + SDValue &Base, SDValue &Index, SDValue &Scale); + + template + bool SelectAddrRegRegScale(SDValue Addr, SDValue &Base, SDValue &Index, + SDValue &Scale) { + return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale); + } + + template + bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index, + SDValue &Scale) { + if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) { + if (Index.getOpcode() == ISD::AND) { + auto *C = dyn_cast(Index.getOperand(1)); + if (C && C->getZExtValue() == maskTrailingOnes(Bits)) { + Index = Index.getOperand(0); + return true; + } + } + } + return false; + } + bool tryShrinkShlLogicImm(SDNode *Node); bool trySignedBitfieldExtract(SDNode *Node); + bool tryIndexedLoad(SDNode *Node); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { @@ -85,6 +110,8 @@ public: bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); } bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); } + bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2); + bool selectVLOp(SDValue N, SDValue &VL); bool selectVSplat(SDValue N, SDValue &SplatVal); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index cb081b7..3b95ff3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1037,6 +1037,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, XLenVT, Expand); } + if (Subtarget.hasVendorXTHeadMemIdx()) { + for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; + ++im) { + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + + if (Subtarget.is64Bit()) { + setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::i64, Legal); + } + } + } + // Function alignments. const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); @@ -14431,6 +14448,95 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { return true; } +bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + bool &IsInc, + SelectionDAG &DAG) const { + // Target does not support indexed loads. + if (!Subtarget.hasVendorXTHeadMemIdx()) + return false; + + if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) + return false; + + Base = Op->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { + int64_t RHSC = RHS->getSExtValue(); + if (Op->getOpcode() == ISD::SUB) + RHSC = -(uint64_t)RHSC; + + // The constants that can be encoded in the THeadMemIdx instructions + // are of the form (sign_extend(imm5) << imm2). + bool isLegalIndexedOffset = false; + for (unsigned i = 0; i < 4; i++) + if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { + isLegalIndexedOffset = true; + break; + } + + if (!isLegalIndexedOffset) + return false; + + IsInc = (Op->getOpcode() == ISD::ADD); + Offset = Op->getOperand(1); + return true; + } + + return false; +} + +bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + EVT VT; + SDValue Ptr; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + } else + return false; + + bool IsInc; + if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) + return false; + + AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; +} + +bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + EVT VT; + SDValue Ptr; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + } else + return false; + + bool IsInc; + if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) + return false; + // Post-indexing updates the base, so it's not a valid transform + // if that's not the same as the load's pointer. + if (Ptr != Base) + return false; + + AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; + return true; +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { EVT SVT = VT.getScalarType(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8c96c99..ea71319 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -638,6 +638,16 @@ public: bool isVScaleKnownToBeAPowerOfTwo() const override; + bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, bool &IsInc, + SelectionDAG &DAG) const; + bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + bool isLegalScaleForGatherScatter(uint64_t Scale, uint64_t ElemSize) const override { // Scaled addressing not supported on indexed load/stores diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index 91ee3f2..41a79a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -164,6 +164,52 @@ class THCacheInst_void funct5, string opcodestr> let rs2 = funct5; } +let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx", + hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { +class THLoadIndexed funct5, string opcodestr> + : RVInstR { + bits<2> uimm2; + let Inst{26-25} = uimm2; +} + +class THLoadUpdate funct5, string opcodestr> + : RVInstI<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), + (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2), + opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { + bits<5> simm5; + bits<2> uimm2; + let imm12{11-7} = funct5; + let imm12{6-5} = uimm2; + let imm12{4-0} = simm5; + let Constraints = "@earlyclobber $rd, $rs1_wb = $rs1"; +} +} + +let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx", + hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { +class THStoreIndexed funct5, string opcodestr> + : RVInstR { + bits<2> uimm2; + let Inst{26-25} = uimm2; +} + +class THStoreUpdate funct5, string opcodestr> + : RVInstI<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up), + (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2), + opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { + bits<5> simm5; + bits<2> uimm2; + let imm12{11-7} = funct5; + let imm12{6-5} = uimm2; + let imm12{4-0} = simm5; + let Constraints = "$rs1_up = $rs1"; +} +} + //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -238,6 +284,120 @@ def TH_SDD : THStorePair<0b11111, "th.sdd">, Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>; } +let Predicates = [HasVendorXTHeadMemIdx] in { +// T-Head Load/Store + Update instructions. +def TH_LBIA : THLoadUpdate<0b00011, "th.lbia">, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LBIB : THLoadUpdate<0b00001, "th.lbib">, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LBUIA : THLoadUpdate<0b10011, "th.lbuia">, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LBUIB : THLoadUpdate<0b10001, "th.lbuib">, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_LHIA : THLoadUpdate<0b00111, "th.lhia">, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LHIB : THLoadUpdate<0b00101, "th.lhib">, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LHUIA : THLoadUpdate<0b10111, "th.lhuia">, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LHUIB : THLoadUpdate<0b10101, "th.lhuib">, + Sched<[WriteLDH, ReadMemBase]>; + +def TH_LWIA : THLoadUpdate<0b01011, "th.lwia">, + Sched<[WriteLDW, ReadMemBase]>; +def TH_LWIB : THLoadUpdate<0b01001, "th.lwib">, + Sched<[WriteLDW, ReadMemBase]>; + +def TH_SBIA : THStoreUpdate<0b00011, "th.sbia">, + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; +def TH_SBIB : THStoreUpdate<0b00001, "th.sbib">, + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; + +def TH_SHIA : THStoreUpdate<0b00111, "th.shia">, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; +def TH_SHIB : THStoreUpdate<0b00101, "th.shib">, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + +def TH_SWIA : THStoreUpdate<0b01011, "th.swia">, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; +def TH_SWIB : THStoreUpdate<0b01001, "th.swib">, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + +// T-Head Load/Store Indexed instructions. +def TH_LRB : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LRBU : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LURB : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LURBU : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_LRH : THLoadIndexed, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LRHU : THLoadIndexed, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LURH : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; +def TH_LURHU : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_LRW : THLoadIndexed, + Sched<[WriteLDW, ReadMemBase]>; +def TH_LURW : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_SRB : THStoreIndexed, + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; +def TH_SURB : THStoreIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_SRH : THStoreIndexed, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; +def TH_SURH : THStoreIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_SRW : THStoreIndexed, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; +def TH_SURW : THStoreIndexed, + Sched<[WriteLDB, ReadMemBase]>; +} + +let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in { +// T-Head Load/Store + Update instructions. +def TH_LWUIA : THLoadUpdate<0b11011, "th.lwuia">, + Sched<[WriteLDH, ReadMemBase]>; +def TH_LWUIB : THLoadUpdate<0b11001, "th.lwuib">, + Sched<[WriteLDH, ReadMemBase]>; + +def TH_LDIA : THLoadUpdate<0b01111, "th.ldia">, + Sched<[WriteLDW, ReadMemBase]>; +def TH_LDIB : THLoadUpdate<0b01101, "th.ldib">, + Sched<[WriteLDW, ReadMemBase]>; + +def TH_SDIA : THStoreUpdate<0b01111, "th.sdia">, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; +def TH_SDIB : THStoreUpdate<0b01101, "th.sdib">, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + +// T-Head Load/Store Indexed instructions. +def TH_LRWU : THLoadIndexed, + Sched<[WriteLDW, ReadMemBase]>; +def TH_LURWU : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_LRD : THLoadIndexed, + Sched<[WriteLDW, ReadMemBase]>; +def TH_LURD : THLoadIndexed, + Sched<[WriteLDB, ReadMemBase]>; + +def TH_SRD : THStoreIndexed, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; +def TH_SURD : THStoreIndexed, + Sched<[WriteLDB, ReadMemBase]>; +} + let Predicates = [HasVendorXTHeadVdot], Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { @@ -538,3 +698,104 @@ def TH_SYNC_S : THCacheInst_void<0b11001, "th.sync.s">; def TH_SYNC_I : THCacheInst_void<0b11010, "th.sync.i">; def TH_SYNC_IS : THCacheInst_void<0b11011, "th.sync.is">; } + +def AddrRegRegScale : ComplexPattern">; +def AddrRegZextRegScale + : ComplexPattern", + [], [], 10>; + +multiclass LdIdxPat { +def : Pat<(vt (LoadOp (AddrRegRegScale GPR:$rs1, GPR:$rs2, uimm2:$uimm2))), + (Inst GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; +} + +multiclass LdZextIdxPat { +def : Pat<(vt (LoadOp (AddrRegZextRegScale GPR:$rs1, GPR:$rs2, uimm2:$uimm2))), + (Inst GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; +} + +multiclass StIdxPat { +def : Pat<(StoreOp (vt StTy:$rd), + (AddrRegRegScale GPR:$rs1, GPR:$rs2, uimm2:$uimm2)), + (Inst StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; +} + +multiclass StZextIdxPat { +def : Pat<(StoreOp (vt StTy:$rd), + (AddrRegZextRegScale GPR:$rs1, GPR:$rs2, uimm2:$uimm2)), + (Inst StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; +} + +let Predicates = [HasVendorXTHeadMemIdx] in { +defm : LdIdxPat; +defm : LdIdxPat; +defm : LdIdxPat; + +defm : LdIdxPat; +defm : LdIdxPat; +defm : LdIdxPat; + +defm : StIdxPat; +defm : StIdxPat; +} + +let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in { +defm : LdIdxPat; +defm : StIdxPat; +} + +let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in { +defm : LdZextIdxPat; +defm : LdZextIdxPat; +defm : LdZextIdxPat; + +defm : LdZextIdxPat; +defm : LdZextIdxPat; +defm : LdZextIdxPat; + +defm : LdIdxPat; +defm : LdIdxPat; +defm : LdIdxPat; + +defm : LdZextIdxPat; +defm : LdZextIdxPat; +defm : LdZextIdxPat; + +defm : LdIdxPat; +defm : LdZextIdxPat; + +defm : StZextIdxPat; +defm : StZextIdxPat; +defm : StIdxPat; +defm : StZextIdxPat; +defm : StIdxPat; +defm : StZextIdxPat; +} + +def simm5shl2 : ComplexPattern; + +multiclass StoreUpdatePat { +def : Pat<(st (vt GPR:$rd), GPR:$rs1, (simm5shl2 simm5:$simm5, uimm2:$uimm2)), + (Inst GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2)>; +} + +let Predicates = [HasVendorXTHeadMemIdx] in { +defm : StoreUpdatePat; +defm : StoreUpdatePat; +defm : StoreUpdatePat; +defm : StoreUpdatePat; +} + +let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in { +defm : StoreUpdatePat; +defm : StoreUpdatePat; +} + +let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in { +defm : StoreUpdatePat; +defm : StoreUpdatePat; +defm : StoreUpdatePat; +defm : StoreUpdatePat; +} diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 352b73f..a0daf0c 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -43,6 +43,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s ; RUN: llc -mtriple=riscv32 -mattr=+xtheadcmo %s -o - | FileCheck --check-prefix=RV32XTHEADCMO %s ; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV32XTHEADMAC %s +; RUN: llc -mtriple=riscv32 -mattr=+xtheadmemidx %s -o - | FileCheck --check-prefix=RV32XTHEADMEMIDX %s ; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV32XTHEADMEMPAIR %s ; RUN: llc -mtriple=riscv32 -mattr=+xtheadsync %s -o - | FileCheck --check-prefix=RV32XTHEADSYNC %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCA %s @@ -100,6 +101,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadcmo %s -o - | FileCheck --check-prefix=RV64XTHEADCMO %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADMAC %s +; RUN: llc -mtriple=riscv64 -mattr=+xtheadmemidx %s -o - | FileCheck --check-prefix=RV64XTHEADMEMIDX %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV64XTHEADMEMPAIR %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadsync %s -o - | FileCheck --check-prefix=RV64XTHEADSYNC %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s @@ -156,6 +158,7 @@ ; RV32SVINVAL: .attribute 5, "rv32i2p0_svinval1p0" ; RV32XTHEADCMO: .attribute 5, "rv32i2p0_xtheadcmo1p0" ; RV32XTHEADMAC: .attribute 5, "rv32i2p0_xtheadmac1p0" +; RV32XTHEADMEMIDX: .attribute 5, "rv32i2p0_xtheadmemidx1p0" ; RV32XTHEADMEMPAIR: .attribute 5, "rv32i2p0_xtheadmempair1p0" ; RV32XTHEADSYNC: .attribute 5, "rv32i2p0_xtheadsync1p0" ; RV32ZCA: .attribute 5, "rv32i2p0_zca1p0" @@ -213,6 +216,7 @@ ; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0" ; RV64XTHEADCMO: .attribute 5, "rv64i2p0_xtheadcmo1p0" ; RV64XTHEADMAC: .attribute 5, "rv64i2p0_xtheadmac1p0" +; RV64XTHEADMEMIDX: .attribute 5, "rv64i2p0_xtheadmemidx1p0" ; RV64XTHEADMEMPAIR: .attribute 5, "rv64i2p0_xtheadmempair1p0" ; RV64XTHEADSYNC: .attribute 5, "rv64i2p0_xtheadsync1p0" ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0" diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll new file mode 100644 index 0000000..74fd31b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -0,0 +1,1064 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX +; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMIDX + +define i8* @lbia(i8* %base, i8* %addr.2, i8 %a) { +; RV32XTHEADMEMIDX-LABEL: lbia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0 +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV32XTHEADMEMIDX-NEXT: sb a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lbia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sb a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i8, i8* %base, i8 0 + %ld = load i8, i8* %addr + %addr.1 = getelementptr i8, i8* %base, i8 -1 + %res = add i8 %ld, %a + store i8 %res, i8* %addr.2 + ret i8* %addr.1 +} + +define i8* @lbib(i8* %base, i8 %a) { +; RV32XTHEADMEMIDX-LABEL: lbib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: sb a1, 1(a0) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lbib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV64XTHEADMEMIDX-NEXT: sb a1, 1(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i8, i8* %base, i8 1 + %ld = load i8, i8* %addr + %addr.1 = getelementptr i8, i8* %base, i8 2 + %res = add i8 %ld, %a + store i8 %res, i8* %addr.1 + ret i8* %addr +} + +define i8* @lbuia(i8* %base, i64* %addr.2, i64 %a) { +; RV32XTHEADMEMIDX-LABEL: lbuia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lbuia a4, (a0), -1, 0 +; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 +; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lbuia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lbuia a3, (a0), -1, 0 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i8, i8* %base, i8 0 + %ld = load i8, i8* %addr + %zext = zext i8 %ld to i64 + %addr.1 = getelementptr i8, i8* %base, i8 -1 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.2 + ret i8* %addr.1 +} + +define i8* @lbuib(i8* %base, i64 %a, i64* %addr.1) { +; RV32XTHEADMEMIDX-LABEL: lbuib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lbuib a4, (a0), 1, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 +; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3) +; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lbuib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lbuib a3, (a0), 1, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 +; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i8, i8* %base, i8 1 + %ld = load i8, i8* %addr + %zext = zext i8 %ld to i64 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.1 + ret i8* %addr +} + +define i16* @lhia(i16* %base, i16* %addr.2, i16 %a) { +; RV32XTHEADMEMIDX-LABEL: lhia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1 +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV32XTHEADMEMIDX-NEXT: sh a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lhia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sh a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i16, i16* %base, i16 0 + %ld = load i16, i16* %addr + %addr.1 = getelementptr i16, i16* %base, i16 -16 + %res = add i16 %ld, %a + store i16 %res, i16* %addr.2 + ret i16* %addr.1 +} + +define i16* @lhib(i16* %base, i16 %a) { +; RV32XTHEADMEMIDX-LABEL: lhib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: sh a1, 2(a0) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lhib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV64XTHEADMEMIDX-NEXT: sh a1, 2(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i16, i16* %base, i16 1 + %ld = load i16, i16* %addr + %addr.1 = getelementptr i16, i16* %base, i16 2 + %res = add i16 %ld, %a + store i16 %res, i16* %addr.1 + ret i16* %addr +} + +define i16* @lhuia(i16* %base, i64* %addr.2, i64 %a) { +; RV32XTHEADMEMIDX-LABEL: lhuia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lhuia a4, (a0), -16, 1 +; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 +; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lhuia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lhuia a3, (a0), -16, 1 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i16, i16* %base, i16 0 + %ld = load i16, i16* %addr + %zext = zext i16 %ld to i64 + %addr.1 = getelementptr i16, i16* %base, i16 -16 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.2 + ret i16* %addr.1 +} + +define i16* @lhuib(i16* %base, i64 %a, i64* %addr.1) { +; RV32XTHEADMEMIDX-LABEL: lhuib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lhuib a4, (a0), 2, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 +; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3) +; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lhuib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lhuib a3, (a0), 2, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 +; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i16, i16* %base, i16 1 + %ld = load i16, i16* %addr + %zext = zext i16 %ld to i64 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.1 + ret i16* %addr +} + +define i32* @lwia(i32* %base, i32* %addr.2, i32 %a) { +; RV32XTHEADMEMIDX-LABEL: lwia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2 +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lwia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i32, i32* %base, i32 0 + %ld = load i32, i32* %addr + %addr.1 = getelementptr i32, i32* %base, i32 -16 + %res = add i32 %ld, %a + store i32 %res, i32* %addr.2 + ret i32* %addr.1 +} + +define i32* @lwib(i32* %base, i32 %a) { +; RV32XTHEADMEMIDX-LABEL: lwib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lwib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV64XTHEADMEMIDX-NEXT: sw a1, 4(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i32, i32* %base, i32 1 + %ld = load i32, i32* %addr + %addr.1 = getelementptr i32, i32* %base, i32 2 + %res = add i32 %ld, %a + store i32 %res, i32* %addr.1 + ret i32* %addr +} + +define i32* @lwuia(i32* %base, i64* %addr.2, i64 %a) { +; RV32XTHEADMEMIDX-LABEL: lwuia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lwia a4, (a0), -16, 2 +; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 +; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lwuia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lwuia a3, (a0), -16, 2 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i32, i32* %base, i32 0 + %ld = load i32, i32* %addr + %zext = zext i32 %ld to i64 + %addr.1 = getelementptr i32, i32* %base, i32 -16 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.2 + ret i32* %addr.1 +} + +define i32* @lwuib(i32* %base, i64 %a, i64* %addr.1) { +; RV32XTHEADMEMIDX-LABEL: lwuib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lwib a4, (a0), 4, 0 +; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 +; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3) +; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lwuib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lwuib a3, (a0), 4, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 +; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i32, i32* %base, i32 1 + %ld = load i32, i32* %addr + %zext = zext i32 %ld to i64 + %res = add i64 %zext, %a + store i64 %res, i64* %addr.1 + ret i32* %addr +} + +define i64* @ldia(i64* %base, i64* %addr.2, i64 %a) { +; RV32XTHEADMEMIDX-LABEL: ldia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: lw a4, 4(a0) +; RV32XTHEADMEMIDX-NEXT: lw a5, 0(a0) +; RV32XTHEADMEMIDX-NEXT: addi a0, a0, -128 +; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3 +; RV32XTHEADMEMIDX-NEXT: add a2, a5, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a5 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 +; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) +; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: ldia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.ldia a3, (a0), -16, 3 +; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i64, i64* %base, i64 0 + %ld = load i64, i64* %addr + %addr.1 = getelementptr i64, i64* %base, i64 -16 + %res = add i64 %ld, %a + store i64 %res, i64* %addr.2 + ret i64* %addr.1 +} + +define i64* @ldib(i64* %base, i64 %a) { +; RV32XTHEADMEMIDX-LABEL: ldib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lwib a3, (a0), 8, 0 +; RV32XTHEADMEMIDX-NEXT: lw a4, 4(a0) +; RV32XTHEADMEMIDX-NEXT: add a1, a3, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a3, a1, a3 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a3 +; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: sw a1, 8(a0) +; RV32XTHEADMEMIDX-NEXT: sw a2, 12(a0) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: ldib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.ldib a2, (a0), 8, 0 +; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV64XTHEADMEMIDX-NEXT: sd a1, 8(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %addr = getelementptr i64, i64* %base, i64 1 + %ld = load i64, i64* %addr + %addr.1 = getelementptr i64, i64* %base, i64 2 + %res = add i64 %ld, %a + store i64 %res, i64* %addr.1 + ret i64* %addr +} + +define i8* @sbia(i8* %base, i8 %a, i8 %b) { +; RV32XTHEADMEMIDX-LABEL: sbia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: sbia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i8, i8* %base, i8 1 + %res = add i8 %a, %b + store i8 %res, i8* %base + ret i8* %addr.1 +} + +define i8* @sbib(i8* %base, i8 %a, i8 %b) { +; RV32XTHEADMEMIDX-LABEL: sbib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: sbib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i8, i8* %base, i8 1 + %res = add i8 %a, %b + store i8 %res, i8* %addr.1 + ret i8* %addr.1 +} + +define i16* @shia(i16* %base, i16 %a, i16 %b) { +; RV32XTHEADMEMIDX-LABEL: shia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: shia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i16, i16* %base, i16 -9 + %res = add i16 %a, %b + store i16 %res, i16* %base + ret i16* %addr.1 +} + +define i16* @shib(i16* %base, i16 %a, i16 %b) { +; RV32XTHEADMEMIDX-LABEL: shib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: shib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i16, i16* %base, i16 1 + %res = add i16 %a, %b + store i16 %res, i16* %addr.1 + ret i16* %addr.1 +} + +define i32* @swia(i32* %base, i32 %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: swia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: swia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i32, i32* %base, i32 8 + %res = add i32 %a, %b + store i32 %res, i32* %base + ret i32* %addr.1 +} + +define i32* @swib(i32* %base, i32 %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: swib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: swib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i32, i32* %base, i32 -26 + %res = add i32 %a, %b + store i32 %res, i32* %addr.1 + ret i32* %addr.1 +} + +define i64* @sdia(i64* %base, i64 %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: sdia: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: addi a5, a0, 64 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a1, a3 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a3, a1 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: sw a3, 0(a0) +; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0) +; RV32XTHEADMEMIDX-NEXT: mv a0, a5 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: sdia: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.sdia a1, (a0), 8, 3 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i64, i64* %base, i64 8 + %res = add i64 %a, %b + store i64 %res, i64* %base + ret i64* %addr.1 +} + +define i64* @sdib(i64* %base, i64 %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: sdib: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a1, a3 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a3, a1 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: th.swib a3, (a0), 8, 0 +; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: sdib: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.sdib a1, (a0), 8, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i64, i64* %base, i64 1 + %res = add i64 %a, %b + store i64 %res, i64* %addr.1 + ret i64* %addr.1 +} + +define i64 @lrb(i8* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrb: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrb: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i8, i8* %a, i64 %b + %2 = load i8, i8* %1, align 1 + %3 = sext i8 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurb(i8* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurb: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurb: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurb a0, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i8, i8* %a, i64 %1 + %3 = load i8, i8* %2, align 1 + %4 = sext i8 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrbu(i8* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrbu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrbu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i8, i8* %a, i64 %b + %2 = load i8, i8* %1, align 1 + %3 = zext i8 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurbu(i8* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurbu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurbu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurbu a0, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i8, i8* %a, i64 %1 + %3 = load i8, i8* %2, align 1 + %4 = zext i8 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrh(i16* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrh: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrh: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i16, i16* %a, i64 %b + %2 = load i16, i16* %1, align 4 + %3 = sext i16 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurh(i16* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurh: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurh: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurh a0, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i16, i16* %a, i64 %1 + %3 = load i16, i16* %2, align 4 + %4 = sext i16 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrhu(i16* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrhu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrhu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i16, i16* %a, i64 %b + %2 = load i16, i16* %1, align 4 + %3 = zext i16 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurhu(i16* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurhu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurhu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurhu a0, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i16, i16* %a, i64 %1 + %3 = load i16, i16* %2, align 4 + %4 = zext i16 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrw(i32* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrw: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrw: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i32, i32* %a, i64 %b + %2 = load i32, i32* %1, align 4 + %3 = sext i32 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurw(i32* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurw: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurw: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurw a0, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i32, i32* %a, i64 %1 + %3 = load i32, i32* %2, align 4 + %4 = sext i32 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrwu(i32* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrwu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrwu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrwu a0, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i32, i32* %a, i64 %b + %2 = load i32, i32* %1, align 4 + %3 = zext i32 %2 to i64 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurwu(i32* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurwu: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurwu: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurwu a0, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i32, i32* %a, i64 %1 + %3 = load i32, i32* %2, align 4 + %4 = zext i32 %3 to i64 + %5 = add i64 %4, %4 + ret i64 %5 +} + +define i64 @lrd(i64* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrd: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: slli a2, a1, 3 +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a2 +; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a0) +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrd: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = getelementptr i64, i64* %a, i64 %b + %2 = load i64, i64* %1, align 8 + %3 = add i64 %2, %2 + ret i64 %3 +} + +define i64 @lrd_2(i64* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrd_2: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96 +; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3 +; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100 +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a1 +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrd_2: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: addi a0, a0, 96 +; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i64 %b, 12 + %2 = getelementptr i64, i64* %a, i64 %1 + %3 = load i64, i64* %2, align 8 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define i64 @lurd(i64* %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: lurd: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: slli a2, a1, 3 +; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a2 +; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a0) +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lurd: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: th.lurd a0, a0, a1, 3 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = getelementptr i64, i64* %a, i64 %1 + %3 = load i64, i64* %2, align 8 + %4 = add i64 %3, %3 + ret i64 %4 +} + +define void @srb(i8* %a, i64 %b, i8 %c) { +; RV32XTHEADMEMIDX-LABEL: srb: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 +; RV32XTHEADMEMIDX-NEXT: th.srb a3, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: srb: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.srb a2, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i8 %c, %c + %2 = getelementptr i8, i8* %a, i64 %b + store i8 %1, i8* %2, align 8 + ret void +} + +define void @surb(i8* %a, i32 %b, i8 %c) { +; RV32XTHEADMEMIDX-LABEL: surb: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: th.srb a2, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: surb: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.surb a2, a0, a1, 0 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = add i8 %c, %c + %3 = getelementptr i8, i8* %a, i64 %1 + store i8 %2, i8* %3, align 8 + ret void +} + +define void @srh(i16* %a, i64 %b, i16 %c) { +; RV32XTHEADMEMIDX-LABEL: srh: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 +; RV32XTHEADMEMIDX-NEXT: th.srh a3, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: srh: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.srh a2, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i16 %c, %c + %2 = getelementptr i16, i16* %a, i64 %b + store i16 %1, i16* %2, align 8 + ret void +} + +define void @surh(i16* %a, i32 %b, i16 %c) { +; RV32XTHEADMEMIDX-LABEL: surh: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: th.srh a2, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: surh: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.surh a2, a0, a1, 1 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = add i16 %c, %c + %3 = getelementptr i16, i16* %a, i64 %1 + store i16 %2, i16* %3, align 8 + ret void +} + +define void @srw(i32* %a, i64 %b, i32 %c) { +; RV32XTHEADMEMIDX-LABEL: srw: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 +; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: srw: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i32 %c, %c + %2 = getelementptr i32, i32* %a, i64 %b + store i32 %1, i32* %2, align 8 + ret void +} + +define void @surw(i32* %a, i32 %b, i32 %c) { +; RV32XTHEADMEMIDX-LABEL: surw: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 2 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: surw: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.surw a2, a0, a1, 2 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = add i32 %c, %c + %3 = getelementptr i32, i32* %a, i64 %1 + store i32 %2, i32* %3, align 8 + ret void +} + +define void @srd(i64* %a, i64 %b, i64 %c) { +; RV32XTHEADMEMIDX-LABEL: srd: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a3 +; RV32XTHEADMEMIDX-NEXT: sltu a3, a2, a3 +; RV32XTHEADMEMIDX-NEXT: add a4, a4, a4 +; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3 +; RV32XTHEADMEMIDX-NEXT: slli a4, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a4, a0, a4 +; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a4) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: srd: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.srd a2, a0, a1, 3 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i64 %c, %c + %2 = getelementptr i64, i64* %a, i64 %b + store i64 %1, i64* %2, align 8 + ret void +} + +define void @surd(i64* %a, i32 %b, i64 %c) { +; RV32XTHEADMEMIDX-LABEL: surd: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a4, a2, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 +; RV32XTHEADMEMIDX-NEXT: slli a3, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a3, a0, a3 +; RV32XTHEADMEMIDX-NEXT: th.srw a4, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: surd: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV64XTHEADMEMIDX-NEXT: th.surd a2, a0, a1, 3 +; RV64XTHEADMEMIDX-NEXT: ret + %1 = zext i32 %b to i64 + %2 = add i64 %c, %c + %3 = getelementptr i64, i64* %a, i64 %1 + store i64 %2, i64* %3, align 8 + ret void +} + +define i32* @test_simm5(i32* %base, i32 %a, i32 %b) { +; RV32XTHEADMEMIDX-LABEL: test_simm5: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2 +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: test_simm5: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2 +; RV64XTHEADMEMIDX-NEXT: ret + %addr.1 = getelementptr i32, i32* %base, i32 -12 + %res = add i32 %a, %b + store i32 %res, i32* %base + ret i32* %addr.1 +} + +define i64 @lrd_large_shift(i64* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrd_large_shift: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 5 +; RV32XTHEADMEMIDX-NEXT: add a1, a1, a0 +; RV32XTHEADMEMIDX-NEXT: lw a0, 384(a1) +; RV32XTHEADMEMIDX-NEXT: lw a1, 388(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrd_large_shift: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: slli a1, a1, 5 +; RV64XTHEADMEMIDX-NEXT: add a0, a1, a0 +; RV64XTHEADMEMIDX-NEXT: ld a0, 384(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i64 %b, 12 + %2 = shl i64 %1, 2 + %3 = getelementptr i64, i64* %a, i64 %2 + %4 = load i64, i64* %3, align 8 + ret i64 %4 +} + +define i64 @lrd_large_offset(i64* %a, i64 %b) { +; RV32XTHEADMEMIDX-LABEL: lrd_large_offset: +; RV32XTHEADMEMIDX: # %bb.0: +; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0 +; RV32XTHEADMEMIDX-NEXT: lui a1, 23 +; RV32XTHEADMEMIDX-NEXT: addi a1, a1, 1792 +; RV32XTHEADMEMIDX-NEXT: add a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: lw a0, 0(a1) +; RV32XTHEADMEMIDX-NEXT: lw a1, 4(a1) +; RV32XTHEADMEMIDX-NEXT: ret +; +; RV64XTHEADMEMIDX-LABEL: lrd_large_offset: +; RV64XTHEADMEMIDX: # %bb.0: +; RV64XTHEADMEMIDX-NEXT: slli a1, a1, 3 +; RV64XTHEADMEMIDX-NEXT: add a0, a1, a0 +; RV64XTHEADMEMIDX-NEXT: lui a1, 23 +; RV64XTHEADMEMIDX-NEXT: add a0, a0, a1 +; RV64XTHEADMEMIDX-NEXT: ld a0, 1792(a0) +; RV64XTHEADMEMIDX-NEXT: ret + %1 = add i64 %b, 12000 + %2 = getelementptr i64, i64* %a, i64 %1 + %3 = load i64, i64* %2, align 8 + ret i64 %3 +} diff --git a/llvm/test/MC/RISCV/rv32xtheadmemidx-invalid.s b/llvm/test/MC/RISCV/rv32xtheadmemidx-invalid.s new file mode 100644 index 0000000..e2c7619 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32xtheadmemidx-invalid.s @@ -0,0 +1,14 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmemidx < %s 2>&1 | FileCheck %s + +th.lwuia a0, (a1), 0, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.lwuib a0, (a1), 15, 1 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.ldia a0, (a1), 0, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.ldib a0, (a1), 0, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.sdia a0, (a1), -16, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.sdib a0, (a1), -1, 1 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.lrwu a0, a1, a2, 2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.lrd a0, a1, a2, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.srd a0, a1, a2, 3 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.lurwu a0, a1, a2, 2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.lurd a0, a1, a2, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} +th.surd a0, a1, a2, 3 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv32xtheadmemidx-valid.s b/llvm/test/MC/RISCV/rv32xtheadmemidx-valid.s new file mode 100644 index 0000000..15473c4 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32xtheadmemidx-valid.s @@ -0,0 +1,133 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadmemidx -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadmemidx < %s \ +# RUN: | llvm-objdump --mattr=+xtheadmemidx -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: th.lwia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x5c] +th.lwia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lwib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x4f] +th.lwib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lhia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x3c] +th.lhia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lhib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x2f] +th.lhib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lhuia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0xb8] +th.lhuia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.lhuib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0xaa] +th.lhuib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.lbia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x1c] +th.lbia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lbib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x0f] +th.lbib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lbuia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x98] +th.lbuia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.lbuib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0x8a] +th.lbuib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.swia +# CHECK-ASM: encoding: [0x0b,0xd5,0x05,0x5c] +th.swia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.swib +# CHECK-ASM: encoding: [0x0b,0xd5,0x15,0x4e] +th.swib a0, (a1), 1, 3 + +# CHECK-ASM-AND-OBJ: th.shia +# CHECK-ASM: encoding: [0x0b,0xd5,0x45,0x38] +th.shia a0, (a1), 4, 0 + +# CHECK-ASM-AND-OBJ: th.shib +# CHECK-ASM: encoding: [0x0b,0xd5,0xd5,0x2a] +th.shib a0, (a1), 13, 1 + +# CHECK-ASM-AND-OBJ: th.sbia +# CHECK-ASM: encoding: [0x0b,0xd5,0xe5,0x1c] +th.sbia a0, (a1), 14, 2 + +# CHECK-ASM-AND-OBJ: th.sbib +# CHECK-ASM: encoding: [0x0b,0xd5,0xf5,0x0e] +th.sbib a0, (a1), 15, 3 + +# CHECK-ASM-AND-OBJ: th.lrw +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x42] +th.lrw a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lrh +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x26] +th.lrh a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.lrhu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xa0] +th.lrhu a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lrb +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x02] +th.lrb a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lrbu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x84] +th.lrbu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.srw +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x40] +th.srw a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.srh +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x22] +th.srh a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.srb +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x04] +th.srb a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.lurw +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x52] +th.lurw a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lurh +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x36] +th.lurh a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.lurhu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xb0] +th.lurhu a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lurb +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x12] +th.lurb a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lurbu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x94] +th.lurbu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.surw +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x50] +th.surw a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.surh +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x32] +th.surh a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.surb +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x14] +th.surb a0, a1, a2, 2 diff --git a/llvm/test/MC/RISCV/rv64xtheadmemidx-invalid.s b/llvm/test/MC/RISCV/rv64xtheadmemidx-invalid.s new file mode 100644 index 0000000..fe6d0de --- /dev/null +++ b/llvm/test/MC/RISCV/rv64xtheadmemidx-invalid.s @@ -0,0 +1,13 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmemidx < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadmemidx < %s 2>&1 | FileCheck %s + +th.ldia 0(a0), (a1), 0, 0 # CHECK: :[[@LINE]]:23: error: invalid operand for instruction +th.ldib a0, 2(a1), 15, 1 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction +th.lwia a0, (a1), 30, 2 # CHECK: :[[@LINE]]:20: error: immediate must be an integer in the range [-16, 15] +th.lwib a0, (a1), -16, 43 # CHECK: :[[@LINE]]:25: error: immediate must be an integer in the range [0, 3] +th.lhib a0, (a1), -17, 3 # CHECK: :[[@LINE]]:20: error: immediate must be an integer in the range [-16, 15] +th.lrb -2(a0), a1, a2, 0 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +th.lrw a0, 3(a1), a2, 1 # CHECK: :[[@LINE]]:13: error: invalid operand for instruction +th.lrw a0, a1, 4(a2), 2 # CHECK: :[[@LINE]]:17: error: invalid operand for instruction +th.lrh a0, a1, a2, 5 # CHECK: :[[@LINE]]:21: error: immediate must be an integer in the range [0, 3] +th.lrhu a0, a1, a2, -1 # CHECK: :[[@LINE]]:22: error: immediate must be an integer in the range [0, 3] diff --git a/llvm/test/MC/RISCV/rv64xtheadmemidx-valid.s b/llvm/test/MC/RISCV/rv64xtheadmemidx-valid.s new file mode 100644 index 0000000..24782d1 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64xtheadmemidx-valid.s @@ -0,0 +1,181 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadmemidx -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadmemidx < %s \ +# RUN: | llvm-objdump --mattr=+xtheadmemidx -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: th.ldia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x78] +th.ldia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.ldib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0x6a] +th.ldib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.lwia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x5c] +th.lwia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lwib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x4f] +th.lwib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lwuia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0xd8] +th.lwuia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.lwuib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0xca] +th.lwuib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.lhia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x3c] +th.lhia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lhib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x2f] +th.lhib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lhuia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0xb8] +th.lhuia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.lhuib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0xaa] +th.lhuib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.lbia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x1c] +th.lbia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.lbib +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x0f] +th.lbib a0, (a1), -16, 3 + +# CHECK-ASM-AND-OBJ: th.lbuia +# CHECK-ASM: encoding: [0x0b,0xc5,0x05,0x98] +th.lbuia a0, (a1), 0, 0 + +# CHECK-ASM-AND-OBJ: th.lbuib +# CHECK-ASM: encoding: [0x0b,0xc5,0xf5,0x8a] +th.lbuib a0, (a1), 15, 1 + +# CHECK-ASM-AND-OBJ: th.sdia +# CHECK-ASM: encoding: [0x0b,0xd5,0x05,0x79] +th.sdia a0, (a1), -16, 0 + +# CHECK-ASM-AND-OBJ: th.sdib +# CHECK-ASM: encoding: [0x0b,0xd5,0xf5,0x6b] +th.sdib a0, (a1), -1, 1 + +# CHECK-ASM-AND-OBJ: th.swia +# CHECK-ASM: encoding: [0x0b,0xd5,0x05,0x5c] +th.swia a0, (a1), 0, 2 + +# CHECK-ASM-AND-OBJ: th.swib +# CHECK-ASM: encoding: [0x0b,0xd5,0x15,0x4e] +th.swib a0, (a1), 1, 3 + +# CHECK-ASM-AND-OBJ: th.shia +# CHECK-ASM: encoding: [0x0b,0xd5,0x45,0x38] +th.shia a0, (a1), 4, 0 + +# CHECK-ASM-AND-OBJ: th.shib +# CHECK-ASM: encoding: [0x0b,0xd5,0xd5,0x2a] +th.shib a0, (a1), 13, 1 + +# CHECK-ASM-AND-OBJ: th.sbia +# CHECK-ASM: encoding: [0x0b,0xd5,0xe5,0x1c] +th.sbia a0, (a1), 14, 2 + +# CHECK-ASM-AND-OBJ: th.sbib +# CHECK-ASM: encoding: [0x0b,0xd5,0xf5,0x0e] +th.sbib a0, (a1), 15, 3 + +# CHECK-ASM-AND-OBJ: th.lrd +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x60] +th.lrd a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lrw +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x42] +th.lrw a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lrwu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xc4] +th.lrwu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.lrh +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x26] +th.lrh a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.lrhu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xa0] +th.lrhu a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lrb +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x02] +th.lrb a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lrbu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x84] +th.lrbu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.srd +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x66] +th.srd a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.srw +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x40] +th.srw a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.srh +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x22] +th.srh a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.srb +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x04] +th.srb a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.lurd +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x70] +th.lurd a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lurw +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x52] +th.lurw a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lurwu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xd4] +th.lurwu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.lurh +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x36] +th.lurh a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.lurhu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0xb0] +th.lurhu a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.lurb +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x12] +th.lurb a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.lurbu +# CHECK-ASM: encoding: [0x0b,0xc5,0xc5,0x94] +th.lurbu a0, a1, a2, 2 + +# CHECK-ASM-AND-OBJ: th.surd +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x76] +th.surd a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: th.surw +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x50] +th.surw a0, a1, a2, 0 + +# CHECK-ASM-AND-OBJ: th.surh +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x32] +th.surh a0, a1, a2, 1 + +# CHECK-ASM-AND-OBJ: th.surb +# CHECK-ASM: encoding: [0x0b,0xd5,0xc5,0x14] +th.surb a0, a1, a2, 2 -- 2.7.4