[AMDGPU] Support shared literals in FMAMK/FMAAK

author Joe Nash <Joseph.Nash@amd.com>

Mon, 4 Oct 2021 14:56:30 +0000 (10:56 -0400)

committer Joe Nash <Joseph.Nash@amd.com>

Mon, 11 Oct 2021 17:09:54 +0000 (13:09 -0400)
author Joe Nash <Joseph.Nash@amd.com>
Mon, 4 Oct 2021 14:56:30 +0000 (10:56 -0400)
committer Joe Nash <Joseph.Nash@amd.com>
Mon, 11 Oct 2021 17:09:54 +0000 (13:09 -0400)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

index 856bb26c4fee0c5b49922aecdd7e959a027cc76b..4acd77a9d5d202097aa92355ad95d307e30f5aa1 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1542,7 +1542,7 @@ private:
    bool validateOpSel(const MCInst &Inst);
    bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
    bool validateVccOperand(unsigned Reg) const;
-  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
    bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
    bool validateAGPRLdSt(const MCInst &Inst) const;
    bool validateVGPRAlign(const MCInst &Inst) const;
@@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
    switch (OperandType) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
    case AMDGPU::OPERAND_REG_IMM_V2FP32:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
    case AMDGPU::OPERAND_REG_IMM_V2INT32:
+  case AMDGPU::OPERAND_KIMM32:
      return &APFloat::IEEEsingle();
    case AMDGPU::OPERAND_REG_IMM_INT64:
    case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
      return &APFloat::IEEEdouble();
    case AMDGPU::OPERAND_REG_IMM_INT16:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
    case AMDGPU::OPERAND_REG_IMM_V2INT16:
    case AMDGPU::OPERAND_REG_IMM_V2FP16:
+  case AMDGPU::OPERAND_KIMM16:
      return &APFloat::IEEEhalf();
    default:
      llvm_unreachable("unsupported fp type");
@@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
  
      case AMDGPU::OPERAND_REG_IMM_INT32:
      case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
      case AMDGPU::OPERAND_REG_INLINE_C_INT32:
      case AMDGPU::OPERAND_REG_INLINE_C_FP32:
      case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
      case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
      case AMDGPU::OPERAND_REG_IMM_INT16:
      case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
      case AMDGPU::OPERAND_REG_INLINE_C_INT16:
      case AMDGPU::OPERAND_REG_INLINE_C_FP16:
      case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
      case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
      case AMDGPU::OPERAND_REG_IMM_V2FP32:
      case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
-    case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+    case AMDGPU::OPERAND_REG_IMM_V2INT32:
+    case AMDGPU::OPERAND_KIMM32:
+    case AMDGPU::OPERAND_KIMM16: {
        bool lost;
        APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
        // Convert literal to single precision
@@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
    switch (OpTy) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
  
    case AMDGPU::OPERAND_REG_IMM_INT16:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
      Inst.addOperand(MCOperand::createImm(Val));
      return;
    }
+  case AMDGPU::OPERAND_KIMM32:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+    setImmKindNone();
+    return;
+  case AMDGPU::OPERAND_KIMM16:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+    setImmKindNone();
+    return;
    default:
      llvm_unreachable("invalid operand size");
    }
@@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
         SIInstrFlags::SDWA)) {
      // Check special imm operands (used by madmk, etc)
      if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
-      ++ConstantBusUseCount;
+      ++NumLiterals;
+      LiteralSize = 4;
      }
  
      SmallDenseSet<unsigned> SGPRsUsed;
@@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
  
            // An instruction may use only one literal.
            // This has been validated on the previous step.
-          // See validateVOP3Literal.
+          // See validateVOPLiteral.
            // This literal may be used as more than one operand.
            // If all these operands are of the same size,
            // this literal counts as one scalar value.
@@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
      (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
  }
  
-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
-                                          const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+                                         const OperandVector &Operands) {
    unsigned Opcode = Inst.getOpcode();
    const MCInstrDesc &Desc = MII.get(Opcode);
-  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+      ImmIdx == -1)
      return true;
  
    const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
    const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
    const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
  
-  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
  
    unsigned NumExprs = 0;
    unsigned NumLiterals = 0;
    uint32_t LiteralValue;
  
    for (int OpIdx : OpIndices) {
-    if (OpIdx == -1) break;
+    if (OpIdx == -1)
+      continue;
  
      const MCOperand &MO = Inst.getOperand(OpIdx);
      if (!MO.isImm() && !MO.isExpr())
@@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
    if (!NumLiterals)
      return true;
  
-  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
      Error(getLitLoc(Operands), "literal operands are not supported");
      return false;
    }
@@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
        "only one literal operand is allowed");
      return false;
    }
-  if (!validateVOP3Literal(Inst, Operands)) {
+  if (!validateVOPLiteral(Inst, Operands)) {
      return false;
    }
    if (!validateConstantBusLimitations(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

index d702b1abe4b3faf62968f1ce5e3deb8b0b16ab1e..e2186d4d533e50f93253a5d3d1ca7964d8273633 100644 (file)
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -26,6 +26,7 @@
  #include "llvm/MC/MCExpr.h"
  #include "llvm/MC/MCFixedLenDisassembler.h"
  #include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
  #include "llvm/Support/AMDHSAKernelDescriptor.h"
  
  using namespace llvm;
@@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
    return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
  }
  
+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
  static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
                            const MCRegisterInfo *MRI) {
    if (OpIdx < 0)
@@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
      }
    }
  
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (Res && ImmLitIdx != -1)
+    Res = convertFMAanyK(MI, ImmLitIdx);
+
    // if the opcode was not recognized we'll assume a Size of 4 bytes
    // (unless there are fewer bytes left)
    Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -810,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
    return MCDisassembler::Success;
  }
  
+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+                                                int ImmLitIdx) const {
+  assert(HasLiteral && "Should have decoded a literal");
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+  unsigned DescNumOps = Desc.getNumOperands();
+  assert(DescNumOps == MI.getNumOperands());
+  for (unsigned I = 0; I < DescNumOps; ++I) {
+    auto &Op = MI.getOperand(I);
+    auto OpType = Desc.OpInfo[I].OperandType;
+    bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+                         OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+    if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+        IsDeferredOp)
+      Op.setImm(Literal);
+  }
+  return MCDisassembler::Success;
+}
+
  const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
    return getContext().getRegisterInfo()->
      getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -1019,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
    return decodeDstOp(OPW512, Val);
  }
  
+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+  if (HasLiteral) {
+    if (Literal != Val)
+      return errOperand(Val, "More than one unique literal is illegal");
+  }
+  HasLiteral = true;
+  Literal = Val;
+  return MCOperand::createImm(Literal);
+}
+
  MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
    // For now all literal constants are supposed to be unsigned integer
    // ToDo: deal with signed/unsigned 64-bit integer constants
@@ -1232,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
    return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
  }
  
-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                                          bool MandatoryLiteral) const {
    using namespace AMDGPU::EncValues;
  
    assert(Val < 1024); // enum10
@@ -1261,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
    if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
      return decodeFPImmed(Width, Val);
  
-  if (Val == LITERAL_CONST)
-    return decodeLiteralConstant();
+  if (Val == LITERAL_CONST) {
+    if (MandatoryLiteral)
+      // Keep a sentinel value for deferred setting
+      return MCOperand::createImm(LITERAL_CONST);
+    else
+      return decodeLiteralConstant();
+  }
  
    switch (Width) {
    case OPW32:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

index dc879ec5ad8833f1dd53d77fc9e24a95bd9dc1fe..eea6074d52814e3984cfa5c03ae88d5ceb378f1c 100644 (file)
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -87,6 +87,7 @@ public:
    DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
                                         raw_string_ostream &KdStream) const;
  
+  DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
    DecodeStatus convertSDWAInst(MCInst &MI) const;
    DecodeStatus convertDPP8Inst(MCInst &MI) const;
    DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -150,9 +151,11 @@ public:
  
    static MCOperand decodeIntImmed(unsigned Imm);
    static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+  MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
    MCOperand decodeLiteralConstant() const;
  
-  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                        bool MandatoryLiteral = false) const;
    MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
    MCOperand decodeSpecialReg32(unsigned Val) const;
    MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

index ab340204ad668f1487c8fafef4dcb729772d7880..b68b4b12e750be1919b19335ed692adf858a3b5a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
      switch (OpTy) {
      case AMDGPU::OPERAND_REG_IMM_INT32:
      case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
      case AMDGPU::OPERAND_REG_INLINE_C_INT32:
      case AMDGPU::OPERAND_REG_INLINE_C_FP32:
      case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
      case AMDGPU::OPERAND_REG_INLINE_C_FP16:
      case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
      case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
        printImmediate16(Op.getImm(), STI, O);
        break;
      case AMDGPU::OPERAND_REG_IMM_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp

index dbce4b2e872c05ae68008ecd618eff43b4452f2e..41196054c4825bb65e671d01e07b402ff50b7b90 100644 (file)
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
    switch (OpInfo.OperandType) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
      return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
      // FIXME Is this correct? What do inline immediates do on SI for f16 src
@@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
      uint32_t Encoding = getLit16Encoding(Lo16, STI);
      return Encoding;
    }
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16:
+    return MO.getImm();
    default:
      llvm_unreachable("invalid operand size");
    }
@@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
        (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
      return;
  
-  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  // Do not print literals from SISrc Operands for insts with mandatory literals
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (ImmLitIdx != -1)
+    return;
+
+  // Check for additional literals
    for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
  
      // Check if this operand should be encoded as [SV]Src
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h

index 247ebe3fe741e5f0c0b9174d68c36414aecbc3aa..777744f08cde6f384e43e58d498b9066a5d8c507 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -139,64 +139,67 @@ enum ClassFlags : unsigned {
  }
  
  namespace AMDGPU {
-  enum OperandType : unsigned {
-    /// Operands with register or 32-bit immediate
-    OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
-    OPERAND_REG_IMM_INT64,
-    OPERAND_REG_IMM_INT16,
-    OPERAND_REG_IMM_FP32,
-    OPERAND_REG_IMM_FP64,
-    OPERAND_REG_IMM_FP16,
-    OPERAND_REG_IMM_V2FP16,
-    OPERAND_REG_IMM_V2INT16,
-    OPERAND_REG_IMM_V2INT32,
-    OPERAND_REG_IMM_V2FP32,
-
-    /// Operands with register or inline constant
-    OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_INT32,
-    OPERAND_REG_INLINE_C_INT64,
-    OPERAND_REG_INLINE_C_FP16,
-    OPERAND_REG_INLINE_C_FP32,
-    OPERAND_REG_INLINE_C_FP64,
-    OPERAND_REG_INLINE_C_V2INT16,
-    OPERAND_REG_INLINE_C_V2FP16,
-    OPERAND_REG_INLINE_C_V2INT32,
-    OPERAND_REG_INLINE_C_V2FP32,
-
-    /// Operands with an AccVGPR register or inline constant
-    OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_INT32,
-    OPERAND_REG_INLINE_AC_FP16,
-    OPERAND_REG_INLINE_AC_FP32,
-    OPERAND_REG_INLINE_AC_FP64,
-    OPERAND_REG_INLINE_AC_V2INT16,
-    OPERAND_REG_INLINE_AC_V2FP16,
-    OPERAND_REG_INLINE_AC_V2INT32,
-    OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
-
-    OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
-
-    // Operand for source modifiers for VOP instructions
-    OPERAND_INPUT_MODS,
-
-    // Operand for SDWA instructions
-    OPERAND_SDWA_VOPC_DST,
-
-    /// Operand with 32-bit immediate that uses the constant bus.
-    OPERAND_KIMM32,
-    OPERAND_KIMM16
-  };
+enum OperandType : unsigned {
+  /// Operands with register or 32-bit immediate
+  OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_REG_IMM_INT64,
+  OPERAND_REG_IMM_INT16,
+  OPERAND_REG_IMM_FP32,
+  OPERAND_REG_IMM_FP64,
+  OPERAND_REG_IMM_FP16,
+  OPERAND_REG_IMM_FP16_DEFERRED,
+  OPERAND_REG_IMM_FP32_DEFERRED,
+  OPERAND_REG_IMM_V2FP16,
+  OPERAND_REG_IMM_V2INT16,
+  OPERAND_REG_IMM_V2INT32,
+  OPERAND_REG_IMM_V2FP32,
+
+  /// Operands with register or inline constant
+  OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_INT32,
+  OPERAND_REG_INLINE_C_INT64,
+  OPERAND_REG_INLINE_C_FP16,
+  OPERAND_REG_INLINE_C_FP32,
+  OPERAND_REG_INLINE_C_FP64,
+  OPERAND_REG_INLINE_C_V2INT16,
+  OPERAND_REG_INLINE_C_V2FP16,
+  OPERAND_REG_INLINE_C_V2INT32,
+  OPERAND_REG_INLINE_C_V2FP32,
+
+  /// Operand with 32-bit immediate that uses the constant bus.
+  OPERAND_KIMM32,
+  OPERAND_KIMM16,
+
+  /// Operands with an AccVGPR register or inline constant
+  OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_INT32,
+  OPERAND_REG_INLINE_AC_FP16,
+  OPERAND_REG_INLINE_AC_FP32,
+  OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_V2INT16,
+  OPERAND_REG_INLINE_AC_V2FP16,
+  OPERAND_REG_INLINE_AC_V2INT32,
+  OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+
+  OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+  // Operand for source modifiers for VOP instructions
+  OPERAND_INPUT_MODS,
+
+  // Operand for SDWA instructions
+  OPERAND_SDWA_VOPC_DST
+
+};
  }
  
  // Input operand modifiers bit-masks
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index d72fbe9b1c5747d4995a16e482f25328db59e014..d5bd71fd941be3daa57f263bbba640cca8c6ba17 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3405,6 +3405,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
    switch (OperandType) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -3443,6 +3444,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
      // This suffers the same problem as the scalar 16-bit cases.
      return AMDGPU::isInlinableIntLiteralV216(Imm);
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
      if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@@ -3836,6 +3838,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
        break;
      case AMDGPU::OPERAND_REG_IMM_INT32:
      case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
        break;
      case AMDGPU::OPERAND_REG_INLINE_C_INT32:
      case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td

index 919fb1819bcde76794d8f3e14ab635425b03000a..8c29437a9f1198ad834b38e2138724e16b9eee98 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1173,6 +1173,7 @@ class kimmOperand<ValueType vt> : Operand<vt> {
    let OperandType = "OPERAND_KIMM"#vt.Size;
    let PrintMethod = "printU"#vt.Size#"ImmOperand";
    let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+  let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
  }
  
  // 32-bit VALU immediate operand that uses the constant bus.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td

index 6d93c56ced2f33d6f8b5f7e9215e46b1821a3c49..49dbb895ef2c58db591f7989c14dc5612aa71370 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1019,6 +1019,30 @@ def VSrc_128 : RegisterOperand<VReg_128> {
    let DecoderMethod = "DecodeVS_128RegisterClass";
  }
  
+//===----------------------------------------------------------------------===//
+//  VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+//  with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+                           string rc_suffix = "_32"> {
+  let OperandNamespace = "AMDGPU" in {
+    def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP16_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+    }
+
+    def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP32_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+      let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+    }
+  }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
  //===----------------------------------------------------------------------===//
  //  VRegSrc_* Operands with a VGPR
  //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

index e68ffef548c36da9257f4782d8f6987bbd0a3072..9da7b9f5145deb5f3dfd5f4b0624e5295bb34948 100644 (file)
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1574,8 +1574,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
    unsigned OpType = Desc.OpInfo[OpNo].OperandType;
    switch (OpType) {
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_IMM_FP64:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_IMM_V2FP16:
    case AMDGPU::OPERAND_REG_IMM_V2INT16:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

index 5bd9f85fab993de201b1d8ac4fffc0f0931923fa..aaf06125cdddb992520d5b6ca7be5c43e007f103 100644 (file)
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -789,6 +789,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
    switch (OpInfo.OperandType) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -797,6 +798,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
    case AMDGPU::OPERAND_REG_IMM_V2FP32:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
      return 4;
  
    case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -808,6 +811,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
  
    case AMDGPU::OPERAND_REG_IMM_INT16:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td

index f7d390c5804c87181cb306968a9cdbec9e613e04..ebb0d759dd2254add655bea24b382a6077e50953 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -270,12 +270,11 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
  class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
    field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
    field dag Ins32 = !if(!eq(vt.Size, 32),
-                        (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
-                        (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+  field string Asm32 = "$vdst, $src0, $src1, $imm";
    field bit HasExt = 0;
    let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $src1, $imm";
  }
  
  def VOP_MADAK_F16 : VOP_MADAK <f16>;
@@ -283,11 +282,10 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;
  
  class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
    field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
-  field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field string Asm32 = "$vdst, $src0, $imm, $src1";
    field bit HasExt = 0;
    let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $imm, $src1";
  }
  
  def VOP_MADMK_F16 : VOP_MADMK <f16>;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s

index 770a3d2b019b4d56918dd0c75b3d0b549ef1a989..e8a3925a51ceabd2fdd791e545f53257bafd7cdc 100644 (file)
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@@ -274,6 +274,26 @@ v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
  // GFX6-7: error: dpp variant of this instruction is not supported
  // GFX8-9: error: not a valid operand
  
+//===----------------------------------------------------------------------===//
+// VOP2
+//===----------------------------------------------------------------------===//
+
+v_fmaak_f32 v0, 0xff32ff, v0, 0x11213141
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32ff, 0x11213141, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmaak_f32 v0, 0xff32, v0, 0x1122
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32, 0x1122, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
  //===----------------------------------------------------------------------===//
  // VOP2 E64.
  //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s

index c3b455c44ec95029c14d9b469be3d34d3750da81..8519e91fabf02b7a44d481d8a786022441b40a3a 100644 (file)
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -10229,9 +10229,15 @@ v_fmamk_f32 v5, v1, 0xa1b1c1d1, v3
  v_fmamk_f32 v5, v1, 0x11213141, v255
  // GFX10: encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11]
  
+v_fmamk_f32 v5, 0x11213141, 0x11213141, v255
+// GFX10: encoding: [0xff,0xfe,0x0b,0x58,0x41,0x31,0x21,0x11]
+
  v_fmaak_f32 v5, v1, v2, 0x11213141
  // GFX10: encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11]
  
+v_fmaak_f32 v5, 0x11213141, v2, 0x11213141
+// GFX10: encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+
  v_fmaak_f32 v255, v1, v2, 0x11213141
  // GFX10: encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11]
  
@@ -11969,6 +11975,9 @@ v_fmamk_f16 v5, v1, 0x1121, v3
  v_fmamk_f16 v255, v1, 0x1121, v3
  // GFX10: encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
  
+v_fmamk_f16 v255, 0x1121, 0x1121, v3
+// GFX10: encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+
  v_fmamk_f16 v5, v255, 0x1121, v3
  // GFX10: encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00]
  
@@ -12014,6 +12023,9 @@ v_fmaak_f16 v5, -4.0, v2, 0x1121
  v_fmaak_f16 v5, v1, v255, 0x1121
  // GFX10: encoding: [0x01,0xff,0x0b,0x70,0x21,0x11,0x00,0x00]
  
+v_fmaak_f16 v5, 0x1121, v255, 0x1121
+// GFX10: encoding: [0xff,0xfe,0x0b,0x70,0x21,0x11,0x00,0x00]
+
  v_fmaak_f16 v5, v1, v2, 0xa1b1
  // GFX10: encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00]
  
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s

index 9642aa06ddad980fbcbcf85896c500f2cb2b2bca..c7a5f9412e2ace7b23de32479f297591fe230b1b 100644 (file)
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
@@ -2337,6 +2337,9 @@ v_madmk_f16 v5, v1, 0xa1b1, v3
  v_madmk_f16 v5, v1, 0x1121, v255
  // CHECK: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
  
+v_madmk_f16 v5, 0x1121, 0x1121, v255
+// CHECK: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+
  v_madak_f16 v5, v1, v2, 0x1121
  // CHECK: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
  
@@ -2367,6 +2370,9 @@ v_madak_f16 v5, v1, v255, 0x1121
  v_madak_f16 v5, v1, v2, 0xa1b1
  // CHECK: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
  
+v_madak_f16 v5, 0x1121, v2, 0x1121
+// CHECK: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+
  v_add_u16 v5, v1, v2
  // CHECK: [0x01,0x05,0x0a,0x4c]
  
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s

index 678079f03c526526309209c6e04307cd05bf24ab..edf397abbba29392d72482620608c04d45ec7dd9 100644 (file)
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -843,6 +843,20 @@ v_madak_f32 v0, shared_base, v0, 0x11213141
  // NOGCN: error: invalid operand (violates constant bus restrictions)
  v_madak_f32 v0, scc, v0, 0x11213141
  
+// NOGCN: error: only one literal operand is allowed
+v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+
+// NOGCN: error: only one literal operand is allowed
+v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madak_f16 v0, 0xff32, v0, 0x1122
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madmk_f16 v0, 0xff32, 0x1122, v0
+
  // NOSICIVI: error: register not available on this GPU
  // NOGFX9: error: invalid operand (violates constant bus restrictions)
  v_cmp_eq_f32 s[0:1], private_base, private_limit
diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s

index 11d4f68312ad3d3833f3f2bc56d891529790e22b..bf2c3920ce990be3328e8c861b67ddefc69cdbe8 100644 (file)
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@@ -270,6 +270,14 @@ v_madmk_f32 v1, v2, 64.0, v3
  // VI:   v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
  v_madak_f32 v1, v2, v3, 64.0
  
+// SICI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x42,0x41,0x31,0x21,0x11]
+// VI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+v_madak_f32 v0, 0x11213141, v0, 0x11213141
+
+// SICI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x40,0x41,0x31,0x21,0x11]
+// VI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+v_madmk_f32 v0, 0x11213141, 0x11213141, v0
+
  // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
  // VI:   v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
  v_bcnt_u32_b32_e64 v1, v2, v3
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt

index 41fe8b01e6124cad2b86f117c06cace55dd37505..7ba35e0ba50f88d4c23266a8dc8722b77f76b4db 100644 (file)
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -73781,6 +73781,9 @@
  # GFX10: v_fmaak_f16 v5, -1, v2, 0x1121          ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
  0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
  
+# GFX10: v_fmaak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
+
  # GFX10: v_fmaak_f32 v5, -1, v2, 0x11213141      ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
  0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
  
@@ -73796,6 +73799,9 @@
  # GFX10: v_fmaak_f32 v5, 0, v2, 0x11213141       ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
  0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
  
+# GFX10: v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 ; encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
+
  # GFX10: v_fmaak_f16 v5, 0.5, v2, 0x1121         ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
  0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
  
@@ -74150,6 +74156,9 @@
  # GFX10: v_fmamk_f16 v255, v1, 0x1121, v3        ; encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
  0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00
  
+# GFX10: v_fmamk_f16 v255, 0x1121, 0x1121, v3    ; encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00
+
  # GFX10: v_fmamk_f32 v255, v1, 0x11213141, v3    ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11]
  0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11
  
@@ -74159,6 +74168,9 @@
  # GFX10: v_fmamk_f32 v5, -1, 0x11213141, v3      ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
  0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
  
+# GFX10: v_fmamk_f32 v5, 0x11213141, 0x11213141, v3 ; encoding: [0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
+0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
+
  # GFX10: v_fmamk_f16 v5, -4.0, 0x1121, v3        ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00]
  0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00
  
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt

index eee589fff1752d79e2c80172fd27702d6814e2d0..a39a25e8be46ded61d5c0fa8952dce9b5683181f 100644 (file)
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@@ -32160,6 +32160,9 @@
  # CHECK: v_madmk_f32 v5, v1, 0x11213141, v255    ; encoding: [0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11]
  0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11
  
+# CHECK: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11
+
  # CHECK: v_madak_f32 v5, v1, v2, 0x11213141      ; encoding: [0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11]
  0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11
  
@@ -32187,6 +32190,9 @@
  # CHECK: v_madak_f32 v5, v1, v2, 0xa1b1c1d1      ; encoding: [0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1]
  0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1
  
+# CHECK: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11
+
  # CHECK: v_add_co_u32_e32 v5, vcc, v1, v2        ; encoding: [0x01,0x05,0x0a,0x32]
  0x01,0x05,0x0a,0x32
  
@@ -33783,6 +33789,9 @@
  # CHECK: v_madmk_f16 v5, v1, 0x1121, v255        ; encoding: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
  0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00
  
+# CHECK: v_madmk_f16 v5, 0x1121, 0x1121, v255    ; encoding: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00
+
  # CHECK: v_madak_f16 v5, v1, v2, 0x1121          ; encoding: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
  0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00
  
@@ -33810,6 +33819,9 @@
  # CHECK: v_madak_f16 v5, v1, v2, 0xa1b1          ; encoding: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
  0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00
  
+# CHECK: v_madak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00
+
  # CHECK: v_add_u16_e32 v5, v1, v2                ; encoding: [0x01,0x05,0x0a,0x4c]
  0x01,0x05,0x0a,0x4c
author	Joe Nash <Joseph.Nash@amd.com>
	Mon, 4 Oct 2021 14:56:30 +0000 (10:56 -0400)
committer	Joe Nash <Joseph.Nash@amd.com>
	Mon, 11 Oct 2021 17:09:54 +0000 (13:09 -0400)
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIDefines.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIRegisterInfo.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP2Instructions.td		patch \| blob \| history
llvm/test/MC/AMDGPU/gfx10_asm_err.s		patch \| blob \| history
llvm/test/MC/AMDGPU/gfx10_asm_vop2.s		patch \| blob \| history
llvm/test/MC/AMDGPU/gfx9_asm_vop2.s		patch \| blob \| history
llvm/test/MC/AMDGPU/literals.s		patch \| blob \| history
llvm/test/MC/AMDGPU/vop2.s		patch \| blob \| history
llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt		patch \| blob \| history
llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt		patch \| blob \| history