include "AArch64InstrFormats.td"
include "SVEInstrFormats.td"
+include "SMEInstrFormats.td"
//===----------------------------------------------------------------------===//
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
-
+include "AArch64SMEInstrInfo.td"
include "AArch64InstrGISel.td"
def qsub1 : SubRegIndex<128>;
def qsub2 : SubRegIndex<128>;
def qsub3 : SubRegIndex<128>;
+ // Note: Code depends on these having consecutive numbers
+ def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits
+ def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
+ def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
+ def zasubs0 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
+ def zasubs1 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
+ def zasubd0 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
+ def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
+ def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
+ def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
}
let Namespace = "AArch64" in {
def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
}
+
+// Accumulator array tiles.
+def ZAQ0 : AArch64Reg<0, "za0.q">;
+def ZAQ1 : AArch64Reg<1, "za1.q">;
+def ZAQ2 : AArch64Reg<2, "za2.q">;
+def ZAQ3 : AArch64Reg<3, "za3.q">;
+def ZAQ4 : AArch64Reg<4, "za4.q">;
+def ZAQ5 : AArch64Reg<5, "za5.q">;
+def ZAQ6 : AArch64Reg<6, "za6.q">;
+def ZAQ7 : AArch64Reg<7, "za7.q">;
+def ZAQ8 : AArch64Reg<8, "za8.q">;
+def ZAQ9 : AArch64Reg<9, "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+ def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+ def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+ def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+ def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+ def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+ def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+ def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+ def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+ def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+ def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+ def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+ def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+ def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+ def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+ def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+ def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+ let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+ let Size = 2048;
+}
+def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+ let Size = 1024;
+}
+def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> {
+ let Size = 512;
+}
+def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> {
+ let Size = 256;
+}
+def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> {
+ let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+// These tiles make up the larger accumulator matrix. The tile representation
+// has an element type suffix, e.g. za0.b or za15.q and can be any of the
+// registers:
+// ZAQ0..ZAQ15
+// ZAD0..ZAD7
+// ZAS0..ZAS3
+// ZAH0..ZAH1
+// or ZAB0
+//
+// * Tile vectors:
+//
+// Their representation is similar to regular tiles, but they have an extra
+// 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+// horizontally or vertically.
+//
+// e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+// ZAQ15, respectively. The horizontal/vertical is more a property of the
+// instruction, than a property of the asm-operand itself, or its register.
+// The distinction is required for the parsing/printing of the operand,
+// as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+// This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+// 'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+ let Name = "MatrixTile" # EltSize;
+ let DiagnosticType = "Invalid" # Name;
+ let ParserMethod = "tryParseMatrixRegister";
+ let RenderMethod = "addMatrixOperands";
+ let PredicateMethod = "isMatrixRegOperand<"
+ # "MatrixKind::Tile" # ", "
+ # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+ : RegisterOperand<RC> {
+ let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+ let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+ let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32 : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64 : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+ : AsmOperandClass {
+ let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+ let DiagnosticType = "Invalid" # Name;
+ let ParserMethod = "tryParseMatrixRegister";
+ let RenderMethod = "addMatrixOperands";
+ let PredicateMethod = "isMatrixRegOperand<"
+ # "MatrixKind::"
+ # !if(IsVertical, "Col", "Row") # ", "
+ # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+ RegisterClass RC, int IsVertical>
+ : RegisterOperand<RC> {
+ let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+ IsVertical>;
+ let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+ let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8 : MatrixTileVectorOperand< 8, 0, MPR8, 0>;
+def TileVectorOpH16 : MatrixTileVectorOperand< 16, 1, MPR16, 0>;
+def TileVectorOpH32 : MatrixTileVectorOperand< 32, 2, MPR32, 0>;
+def TileVectorOpH64 : MatrixTileVectorOperand< 64, 3, MPR64, 0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8 : MatrixTileVectorOperand< 8, 0, MPR8, 1>;
+def TileVectorOpV16 : MatrixTileVectorOperand< 16, 1, MPR16, 1>;
+def TileVectorOpV32 : MatrixTileVectorOperand< 32, 2, MPR32, 1>;
+def TileVectorOpV64 : MatrixTileVectorOperand< 64, 3, MPR64, 1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+ let Name = "Matrix";
+ let DiagnosticType = "Invalid" # Name;
+ let ParserMethod = "tryParseMatrixRegister";
+ let RenderMethod = "addMatrixOperands";
+ let PredicateMethod = "isMatrixRegOperand<"
+ # "MatrixKind::Array" # ", "
+ # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+ let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+ let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
--- /dev/null
+//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
Scalar,
NeonVector,
SVEDataVector,
- SVEPredicateVector
+ SVEPredicateVector,
+ Matrix
};
+enum class MatrixKind { Array, Tile, Row, Col };
+
enum RegConstraintEqualityTy {
EqualsReg,
EqualsSuperReg,
OperandMatchResultTy tryParseScalarRegister(unsigned &Reg);
OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
RegKind MatchKind);
+ OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
k_ShiftedImm,
k_CondCode,
k_Register,
+ k_MatrixRegister,
k_VectorList,
k_VectorIndex,
k_Token,
ShiftExtendOp ShiftExtend;
};
+ struct MatrixRegOp {
+ unsigned RegNum;
+ unsigned ElementWidth;
+ MatrixKind Kind;
+ };
+
struct VectorListOp {
unsigned RegNum;
unsigned Count;
union {
struct TokOp Tok;
struct RegOp Reg;
+ struct MatrixRegOp MatrixReg;
struct VectorListOp VectorList;
struct VectorIndexOp VectorIndex;
struct ImmOp Imm;
case k_Register:
Reg = o.Reg;
break;
+ case k_MatrixRegister:
+ MatrixReg = o.MatrixReg;
+ break;
case k_VectorList:
VectorList = o.VectorList;
break;
return Reg.RegNum;
}
+ unsigned getMatrixReg() const {
+ assert(Kind == k_MatrixRegister && "Invalid access!");
+ return MatrixReg.RegNum;
+ }
+
+ unsigned getMatrixElementWidth() const {
+ assert(Kind == k_MatrixRegister && "Invalid access!");
+ return MatrixReg.ElementWidth;
+ }
+
+ MatrixKind getMatrixKind() const {
+ assert(Kind == k_MatrixRegister && "Invalid access!");
+ return MatrixReg.Kind;
+ }
+
RegConstraintEqualityTy getRegEqualityTy() const {
assert(Kind == k_Register && "Invalid access!");
return Reg.EqualityTy;
Reg.RegNum));
}
+ bool isMatrix() const { return Kind == k_MatrixRegister; }
+
template <unsigned Class> bool isSVEVectorReg() const {
RegKind RK;
switch (Class) {
return true;
}
+ template <MatrixKind Kind, unsigned EltSize, unsigned RegClass>
+ DiagnosticPredicate isMatrixRegOperand() const {
+ if (isMatrix() && getMatrixKind() == Kind &&
+ AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) &&
+ EltSize == getMatrixElementWidth())
+ return DiagnosticPredicateTy::Match;
+ return DiagnosticPredicateTy::NoMatch;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
Inst.addOperand(MCOperand::createReg(getReg()));
}
+ void addMatrixOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getMatrixReg()));
+ }
+
void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
assert(
}
static std::unique_ptr<AArch64Operand>
+ CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
+ SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
+ Op->MatrixReg.RegNum = RegNum;
+ Op->MatrixReg.ElementWidth = ElementWidth;
+ Op->MatrixReg.Kind = Kind;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = std::make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
case k_BTIHint:
OS << getBTIHintName();
break;
+ case k_MatrixRegister:
+ OS << "<matrix " << getMatrixReg() << ">";
+ break;
case k_Register:
OS << "<register " << getReg() << ">";
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
break;
case RegKind::SVEPredicateVector:
case RegKind::SVEDataVector:
+ case RegKind::Matrix:
Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
.Case("", {0, 0})
.Case(".b", {0, 8})
.Default(0);
}
+static unsigned matchMatrixRegName(StringRef Name) {
+ return StringSwitch<unsigned>(Name.lower())
+ .Case("za", AArch64::ZA)
+ .Case("za0.q", AArch64::ZAQ0)
+ .Case("za1.q", AArch64::ZAQ1)
+ .Case("za2.q", AArch64::ZAQ2)
+ .Case("za3.q", AArch64::ZAQ3)
+ .Case("za4.q", AArch64::ZAQ4)
+ .Case("za5.q", AArch64::ZAQ5)
+ .Case("za6.q", AArch64::ZAQ6)
+ .Case("za7.q", AArch64::ZAQ7)
+ .Case("za8.q", AArch64::ZAQ8)
+ .Case("za9.q", AArch64::ZAQ9)
+ .Case("za10.q", AArch64::ZAQ10)
+ .Case("za11.q", AArch64::ZAQ11)
+ .Case("za12.q", AArch64::ZAQ12)
+ .Case("za13.q", AArch64::ZAQ13)
+ .Case("za14.q", AArch64::ZAQ14)
+ .Case("za15.q", AArch64::ZAQ15)
+ .Case("za0.d", AArch64::ZAD0)
+ .Case("za1.d", AArch64::ZAD1)
+ .Case("za2.d", AArch64::ZAD2)
+ .Case("za3.d", AArch64::ZAD3)
+ .Case("za4.d", AArch64::ZAD4)
+ .Case("za5.d", AArch64::ZAD5)
+ .Case("za6.d", AArch64::ZAD6)
+ .Case("za7.d", AArch64::ZAD7)
+ .Case("za0.s", AArch64::ZAS0)
+ .Case("za1.s", AArch64::ZAS1)
+ .Case("za2.s", AArch64::ZAS2)
+ .Case("za3.s", AArch64::ZAS3)
+ .Case("za0.h", AArch64::ZAH0)
+ .Case("za1.h", AArch64::ZAH1)
+ .Case("za0.b", AArch64::ZAB0)
+ .Case("za0h.q", AArch64::ZAQ0)
+ .Case("za1h.q", AArch64::ZAQ1)
+ .Case("za2h.q", AArch64::ZAQ2)
+ .Case("za3h.q", AArch64::ZAQ3)
+ .Case("za4h.q", AArch64::ZAQ4)
+ .Case("za5h.q", AArch64::ZAQ5)
+ .Case("za6h.q", AArch64::ZAQ6)
+ .Case("za7h.q", AArch64::ZAQ7)
+ .Case("za8h.q", AArch64::ZAQ8)
+ .Case("za9h.q", AArch64::ZAQ9)
+ .Case("za10h.q", AArch64::ZAQ10)
+ .Case("za11h.q", AArch64::ZAQ11)
+ .Case("za12h.q", AArch64::ZAQ12)
+ .Case("za13h.q", AArch64::ZAQ13)
+ .Case("za14h.q", AArch64::ZAQ14)
+ .Case("za15h.q", AArch64::ZAQ15)
+ .Case("za0h.d", AArch64::ZAD0)
+ .Case("za1h.d", AArch64::ZAD1)
+ .Case("za2h.d", AArch64::ZAD2)
+ .Case("za3h.d", AArch64::ZAD3)
+ .Case("za4h.d", AArch64::ZAD4)
+ .Case("za5h.d", AArch64::ZAD5)
+ .Case("za6h.d", AArch64::ZAD6)
+ .Case("za7h.d", AArch64::ZAD7)
+ .Case("za0h.s", AArch64::ZAS0)
+ .Case("za1h.s", AArch64::ZAS1)
+ .Case("za2h.s", AArch64::ZAS2)
+ .Case("za3h.s", AArch64::ZAS3)
+ .Case("za0h.h", AArch64::ZAH0)
+ .Case("za1h.h", AArch64::ZAH1)
+ .Case("za0h.b", AArch64::ZAB0)
+ .Case("za0v.q", AArch64::ZAQ0)
+ .Case("za1v.q", AArch64::ZAQ1)
+ .Case("za2v.q", AArch64::ZAQ2)
+ .Case("za3v.q", AArch64::ZAQ3)
+ .Case("za4v.q", AArch64::ZAQ4)
+ .Case("za5v.q", AArch64::ZAQ5)
+ .Case("za6v.q", AArch64::ZAQ6)
+ .Case("za7v.q", AArch64::ZAQ7)
+ .Case("za8v.q", AArch64::ZAQ8)
+ .Case("za9v.q", AArch64::ZAQ9)
+ .Case("za10v.q", AArch64::ZAQ10)
+ .Case("za11v.q", AArch64::ZAQ11)
+ .Case("za12v.q", AArch64::ZAQ12)
+ .Case("za13v.q", AArch64::ZAQ13)
+ .Case("za14v.q", AArch64::ZAQ14)
+ .Case("za15v.q", AArch64::ZAQ15)
+ .Case("za0v.d", AArch64::ZAD0)
+ .Case("za1v.d", AArch64::ZAD1)
+ .Case("za2v.d", AArch64::ZAD2)
+ .Case("za3v.d", AArch64::ZAD3)
+ .Case("za4v.d", AArch64::ZAD4)
+ .Case("za5v.d", AArch64::ZAD5)
+ .Case("za6v.d", AArch64::ZAD6)
+ .Case("za7v.d", AArch64::ZAD7)
+ .Case("za0v.s", AArch64::ZAS0)
+ .Case("za1v.s", AArch64::ZAS1)
+ .Case("za2v.s", AArch64::ZAS2)
+ .Case("za3v.s", AArch64::ZAS3)
+ .Case("za0v.h", AArch64::ZAH0)
+ .Case("za1v.h", AArch64::ZAH1)
+ .Case("za0v.b", AArch64::ZAB0)
+ .Default(0);
+}
+
bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
if ((RegNum = MatchNeonVectorRegName(Name)))
return Kind == RegKind::NeonVector ? RegNum : 0;
+ if ((RegNum = matchMatrixRegName(Name)))
+ return Kind == RegKind::Matrix ? RegNum : 0;
+
// The parsed register must be of RegKind Scalar
if ((RegNum = MatchRegisterName(Name)))
return Kind == RegKind::Scalar ? RegNum : 0;
return false;
}
+OperandMatchResultTy
+AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = getLoc();
+
+ StringRef Name = Tok.getString();
+
+ if (Name.equals_insensitive("za")) {
+ Parser.Lex(); // eat "za"
+ Operands.push_back(AArch64Operand::CreateMatrixRegister(
+ AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
+ getContext()));
+ return MatchOperand_Success;
+ }
+
+ // Try to parse matrix register.
+ unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
+ if (!Reg)
+ return MatchOperand_NoMatch;
+
+ size_t DotPosition = Name.find('.');
+ assert(DotPosition != StringRef::npos && "Unexpected register");
+
+ StringRef Head = Name.take_front(DotPosition);
+ StringRef Tail = Name.drop_front(DotPosition);
+ StringRef RowOrColumn = Head.take_back();
+
+ MatrixKind Kind = StringSwitch<MatrixKind>(RowOrColumn)
+ .Case("h", MatrixKind::Row)
+ .Case("v", MatrixKind::Col)
+ .Default(MatrixKind::Tile);
+
+ // Next up, parsing the suffix
+ const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
+ if (!KindRes) {
+ TokError("Expected the register to be followed by element width suffix");
+ return MatchOperand_ParseFail;
+ }
+ unsigned ElementWidth = KindRes->second;
+
+ Parser.Lex();
+
+ Operands.push_back(AArch64Operand::CreateMatrixRegister(
+ Reg, ElementWidth, Kind, S, getLoc(), getContext()));
+ return MatchOperand_Success;
+}
+
/// tryParseOptionalShift - Some operands take an optional shift argument. Parse
/// them if present.
OperandMatchResultTy
return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0.");
case Match_InvalidSVEExactFPImmOperandZeroOne:
return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0.");
+ case Match_InvalidMatrixTileVectorH8:
+ return Error(Loc, "invalid matrix operand, expected za0h.b");
+ case Match_InvalidMatrixTileVectorH16:
+ return Error(Loc, "invalid matrix operand, expected za[0-1]h.h");
+ case Match_InvalidMatrixTileVectorH32:
+ return Error(Loc, "invalid matrix operand, expected za[0-3]h.s");
+ case Match_InvalidMatrixTileVectorH64:
+ return Error(Loc, "invalid matrix operand, expected za[0-7]h.d");
+ case Match_InvalidMatrixTileVectorH128:
+ return Error(Loc, "invalid matrix operand, expected za[0-15]h.q");
+ case Match_InvalidMatrixTileVectorV8:
+ return Error(Loc, "invalid matrix operand, expected za0v.b");
+ case Match_InvalidMatrixTileVectorV16:
+ return Error(Loc, "invalid matrix operand, expected za[0-1]v.h");
+ case Match_InvalidMatrixTileVectorV32:
+ return Error(Loc, "invalid matrix operand, expected za[0-3]v.s");
+ case Match_InvalidMatrixTileVectorV64:
+ return Error(Loc, "invalid matrix operand, expected za[0-7]v.d");
+ case Match_InvalidMatrixTileVectorV128:
+ return Error(Loc, "invalid matrix operand, expected za[0-15]v.q");
+ case Match_InvalidMatrixTile32:
+ return Error(Loc, "invalid matrix operand, expected za[0-3].s");
+ case Match_InvalidMatrixTile64:
+ return Error(Loc, "invalid matrix operand, expected za[0-7].d");
+ case Match_InvalidMatrix:
+ return Error(Loc, "invalid matrix operand, expected za");
default:
llvm_unreachable("unexpected error code!");
}
case Match_InvalidSVEExactFPImmOperandHalfOne:
case Match_InvalidSVEExactFPImmOperandHalfTwo:
case Match_InvalidSVEExactFPImmOperandZeroOne:
+ case Match_InvalidMatrixTile32:
+ case Match_InvalidMatrixTile64:
+ case Match_InvalidMatrix:
+ case Match_InvalidMatrixTileVectorH8:
+ case Match_InvalidMatrixTileVectorH16:
+ case Match_InvalidMatrixTileVectorH32:
+ case Match_InvalidMatrixTileVectorH64:
+ case Match_InvalidMatrixTileVectorH128:
+ case Match_InvalidMatrixTileVectorV8:
+ case Match_InvalidMatrixTileVectorV16:
+ case Match_InvalidMatrixTileVectorV32:
+ case Match_InvalidMatrixTileVectorV64:
+ case Match_InvalidMatrixTileVectorV128:
case Match_MSR:
case Match_MRS: {
if (ErrorInfo >= Operands.size())
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
return Success;
}
+static const SmallVector<SmallVector<unsigned, 16>, 5>
+ MatrixZATileDecoderTable = {
+ {AArch64::ZAB0},
+ {AArch64::ZAH0, AArch64::ZAH1},
+ {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
+ {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
+ AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
+ {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
+ AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
+ AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
+ AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
+
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned LastReg = (1 << NumBitsForTile) - 1;
+ if (RegNo > LastReg)
+ return Fail;
+ Inst.addOperand(
+ MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
+ return Success;
+}
+
static const unsigned PPRDecoderTable[] = {
AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3,
AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7,
return true;
}
+template <int EltSize>
+void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &RegOp = MI->getOperand(OpNum);
+ assert(RegOp.isReg() && "Unexpected operand type!");
+
+ O << getRegisterName(RegOp.getReg());
+ switch (EltSize) {
+ case 0:
+ break;
+ case 8:
+ O << ".b";
+ break;
+ case 16:
+ O << ".h";
+ break;
+ case 32:
+ O << ".s";
+ break;
+ case 64:
+ O << ".d";
+ break;
+ case 128:
+ O << ".q";
+ break;
+ default:
+ llvm_unreachable("Unsupported element size");
+ }
+}
+
+template <bool IsVertical>
+void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &RegOp = MI->getOperand(OpNum);
+ assert(RegOp.isReg() && "Unexpected operand type!");
+ StringRef RegName = getRegisterName(RegOp.getReg());
+
+ // Insert the horizontal/vertical flag before the suffix.
+ StringRef Base, Suffix;
+ std::tie(Base, Suffix) = RegName.split('.');
+ O << Base << (IsVertical ? "v" : "h") << '.' << Suffix;
+}
+
+void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &RegOp = MI->getOperand(OpNum);
+ assert(RegOp.isReg() && "Unexpected operand type!");
+ O << getRegisterName(RegOp.getReg());
+}
+
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCSubtargetInfo &STI, raw_ostream &O);
void printSVEPattern(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+
+ template <bool IsVertical>
+ void printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixTile(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <int EltSize>
+ void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
template <char = 0>
void printSVERegOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
--- /dev/null
+//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+ ZPRRegOp zpr_ty, string mnemonic>
+ : I<(outs tile_ty:$ZAda),
+ (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+ mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+ "", []>, Sched<[]> {
+ bits<3> Pm;
+ bits<3> Pn;
+ bits<5> Zn;
+ let Inst{31-23} = 0b110000001;
+ let Inst{22} = op;
+ let Inst{21-17} = 0b01000;
+ let Inst{16} = V;
+ let Inst{15-13} = Pm;
+ let Inst{12-10} = Pn;
+ let Inst{9-5} = Zn;
+ let Inst{4-3} = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+ : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+ bits<2> ZAda;
+ let Inst{2} = 0b0;
+ let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+ : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+ bits<3> ZAda;
+ let Inst{2-0} = ZAda;
+}
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addha za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0h.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0v.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0p.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addha za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addha za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 90 c0 <unknown>
+
+addha za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addha za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 90 c0 <unknown>
+
+addha za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addha za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 90 c0 <unknown>
+
+addha za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addha za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 90 c0 <unknown>
+
+addha za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addha za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 90 c0 <unknown>
+
+addha za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addha za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 90 c0 <unknown>
+
+addha za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addha za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 90 c0 <unknown>
+
+addha za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addha za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 90 c0 <unknown>
+
+addha za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addha za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 90 c0 <unknown>
+
+addha za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addha za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 90 c0 <unknown>
+
+addha za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addha za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 90 c0 <unknown>
+
+addha za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addha za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 90 c0 <unknown>
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addha za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d0 c0 <unknown>
+
+addha za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addha za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d0 c0 <unknown>
+
+addha za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addha za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d0 c0 <unknown>
+
+addha za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addha za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d0 c0 <unknown>
+
+addha za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addha za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d0 c0 <unknown>
+
+addha za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addha za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d0 c0 <unknown>
+
+addha za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addha za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d0 c0 <unknown>
+
+addha za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addha za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d0 c0 <unknown>
+
+addha za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addha za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d0 c0 <unknown>
+
+addha za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addha za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d0 c0 <unknown>
+
+addha za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addha za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d0 c0 <unknown>
+
+addha za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addha za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d0 c0 <unknown>
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addva za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addva za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addva za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 91 c0 <unknown>
+
+addva za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addva za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 91 c0 <unknown>
+
+addva za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addva za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 91 c0 <unknown>
+
+addva za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addva za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 91 c0 <unknown>
+
+addva za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addva za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 91 c0 <unknown>
+
+addva za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addva za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 91 c0 <unknown>
+
+addva za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addva za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 91 c0 <unknown>
+
+addva za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addva za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 91 c0 <unknown>
+
+addva za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addva za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 91 c0 <unknown>
+
+addva za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addva za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 91 c0 <unknown>
+
+addva za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addva za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 91 c0 <unknown>
+
+addva za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addva za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 91 c0 <unknown>
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addva za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d1 c0 <unknown>
+
+addva za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addva za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d1 c0 <unknown>
+
+addva za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addva za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d1 c0 <unknown>
+
+addva za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addva za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d1 c0 <unknown>
+
+addva za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addva za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d1 c0 <unknown>
+
+addva za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addva za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d1 c0 <unknown>
+
+addva za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addva za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d1 c0 <unknown>
+
+addva za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addva za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d1 c0 <unknown>
+
+addva za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addva za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d1 c0 <unknown>
+
+addva za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addva za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d1 c0 <unknown>
+
+addva za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addva za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d1 c0 <unknown>
+
+addva za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addva za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d1 c0 <unknown>
add_llvm_target_unittest(AArch64Tests
InstSizes.cpp
DecomposeStackOffsetTest.cpp
+ MatrixRegisterAliasing.cpp
)
--- /dev/null
+#include "AArch64Subtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
+ auto TT(Triple::normalize("aarch64--"));
+ std::string CPU("generic");
+ std::string FS("+sme");
+
+ LLVMInitializeAArch64TargetInfo();
+ LLVMInitializeAArch64Target();
+ LLVMInitializeAArch64TargetMC();
+
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+ return std::unique_ptr<LLVMTargetMachine>(
+ static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
+ TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+ AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetFeatureString()), *TM,
+ /* isLittle */ false);
+ return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+TEST(MatrixRegisterAliasing, Aliasing) {
+ std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
+ ASSERT_TRUE(TM);
+ std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+
+ const AArch64RegisterInfo &TRI = II->getRegisterInfo();
+
+ // za overlaps with za.b
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0));
+
+ // za0.b overlaps with all tiles
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1));
+
+ // za0.h aliases with za0.q, za2.q, ..
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14));
+
+ // za1.h aliases with za1.q, za3.q, ...
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15));
+
+ // za1.h doesn't alias with za0.q, za2.q, ..
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14));
+
+ // za0.h doesn't alias with za1.q, za3.q, ..
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15));
+
+ // za0.s aliases with za0.q, za4.q, za8.q, za12.q
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12));
+
+ // za1.s aliases with za1.q, za5.q, za9.q, za13.q
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13));
+
+ // za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13));
+
+ // za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12));
+
+ // za0.d aliases za0.q and za8.q
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8));
+
+ // za1.d aliases za1.q and za9.q
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1));
+ ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9));
+
+ // za0.d doesn't alias with za1.q and za9.q
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9));
+
+ // za1.d doesn't alias with za0.q and za8.q
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0));
+ ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8));
+}
+
+} // end anonymous namespace