From b4b7e605a6b2e2a9f8c5ce6df042b32f320d239a Mon Sep 17 00:00:00 2001
From: Joe Nash <Joseph.Nash@amd.com>
Date: Mon, 4 Oct 2021 10:56:30 -0400
Subject: [PATCH] [AMDGPU] Support shared literals in FMAMK/FMAAK

These instructions should allow src0 to be a literal with the same
value as the mandatory other literal. Enable it by introducing an
operand that defers adding its value to the MI when decoding till
the mandatory literal is parsed.

Reviewed By: dp, foad

Differential Revision: https://reviews.llvm.org/D111067

Change-Id: I22b0ae0d35bad17b6f976808e48bffe9a6af70b7
---
 .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp    |  46 +++++---
 .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp     |  76 ++++++++++++-
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h       |   5 +-
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp      |   2 +
 .../Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp |  13 ++-
 llvm/lib/Target/AMDGPU/SIDefines.h                 | 119 +++++++++++----------
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp             |   3 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td              |   1 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td           |  24 +++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   2 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h      |   4 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td         |  12 +--
 llvm/test/MC/AMDGPU/gfx10_asm_err.s                |  20 ++++
 llvm/test/MC/AMDGPU/gfx10_asm_vop2.s               |  12 +++
 llvm/test/MC/AMDGPU/gfx9_asm_vop2.s                |   6 ++
 llvm/test/MC/AMDGPU/literals.s                     |  14 +++
 llvm/test/MC/AMDGPU/vop2.s                         |   8 ++
 .../test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt |  12 +++
 llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt |  12 +++
 19 files changed, 309 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 856bb26..4acd77a 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1542,7 +1542,7 @@ private:
   bool validateOpSel(const MCInst &Inst);
   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
   bool validateVccOperand(unsigned Reg) const;
-  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
   bool validateAGPRLdSt(const MCInst &Inst) const;
   bool validateVGPRAlign(const MCInst &Inst) const;
@@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   switch (OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   case AMDGPU::OPERAND_REG_IMM_V2INT32:
+  case AMDGPU::OPERAND_KIMM32:
     return &APFloat::IEEEsingle();
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
     return &APFloat::IEEEdouble();
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
+  case AMDGPU::OPERAND_KIMM16:
     return &APFloat::IEEEhalf();
   default:
     llvm_unreachable("unsupported fp type");
@@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
 
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     case AMDGPU::OPERAND_REG_IMM_INT16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
     case AMDGPU::OPERAND_REG_IMM_V2FP32:
     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
-    case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+    case AMDGPU::OPERAND_REG_IMM_V2INT32:
+    case AMDGPU::OPERAND_KIMM32:
+    case AMDGPU::OPERAND_KIMM16: {
       bool lost;
       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
       // Convert literal to single precision
@@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
   switch (OpTy) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     Inst.addOperand(MCOperand::createImm(Val));
     return;
   }
+  case AMDGPU::OPERAND_KIMM32:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+    setImmKindNone();
+    return;
+  case AMDGPU::OPERAND_KIMM16:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+    setImmKindNone();
+    return;
   default:
     llvm_unreachable("invalid operand size");
   }
@@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
        SIInstrFlags::SDWA)) {
     // Check special imm operands (used by madmk, etc)
     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
-      ++ConstantBusUseCount;
+      ++NumLiterals;
+      LiteralSize = 4;
     }
 
     SmallDenseSet<unsigned> SGPRsUsed;
@@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
 
           // An instruction may use only one literal.
           // This has been validated on the previous step.
-          // See validateVOP3Literal.
+          // See validateVOPLiteral.
           // This literal may be used as more than one operand.
           // If all these operands are of the same size,
           // this literal counts as one scalar value.
@@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
 }
 
-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
-                                          const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+                                         const OperandVector &Operands) {
   unsigned Opcode = Inst.getOpcode();
   const MCInstrDesc &Desc = MII.get(Opcode);
-  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+      ImmIdx == -1)
     return true;
 
   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
 
-  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
 
   unsigned NumExprs = 0;
   unsigned NumLiterals = 0;
   uint32_t LiteralValue;
 
   for (int OpIdx : OpIndices) {
-    if (OpIdx == -1) break;
+    if (OpIdx == -1)
+      continue;
 
     const MCOperand &MO = Inst.getOperand(OpIdx);
     if (!MO.isImm() && !MO.isExpr())
@@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
   if (!NumLiterals)
     return true;
 
-  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
     Error(getLitLoc(Operands), "literal operands are not supported");
     return false;
   }
@@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "only one literal operand is allowed");
     return false;
   }
-  if (!validateVOP3Literal(Inst, Operands)) {
+  if (!validateVOPLiteral(Inst, Operands)) {
     return false;
   }
   if (!validateConstantBusLimitations(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d702b1a..e2186d4 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/AMDHSAKernelDescriptor.h"
 
 using namespace llvm;
@@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
 }
 
+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
                           const MCRegisterInfo *MRI) {
   if (OpIdx < 0)
@@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (Res && ImmLitIdx != -1)
+    Res = convertFMAanyK(MI, ImmLitIdx);
+
   // if the opcode was not recognized we'll assume a Size of 4 bytes
   // (unless there are fewer bytes left)
   Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -810,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
   return MCDisassembler::Success;
 }
 
+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+                                                int ImmLitIdx) const {
+  assert(HasLiteral && "Should have decoded a literal");
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+  unsigned DescNumOps = Desc.getNumOperands();
+  assert(DescNumOps == MI.getNumOperands());
+  for (unsigned I = 0; I < DescNumOps; ++I) {
+    auto &Op = MI.getOperand(I);
+    auto OpType = Desc.OpInfo[I].OperandType;
+    bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+                         OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+    if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+        IsDeferredOp)
+      Op.setImm(Literal);
+  }
+  return MCDisassembler::Success;
+}
+
 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
   return getContext().getRegisterInfo()->
     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -1019,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
   return decodeDstOp(OPW512, Val);
 }
 
+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+  if (HasLiteral) {
+    if (Literal != Val)
+      return errOperand(Val, "More than one unique literal is illegal");
+  }
+  HasLiteral = true;
+  Literal = Val;
+  return MCOperand::createImm(Literal);
+}
+
 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
   // For now all literal constants are supposed to be unsigned integer
   // ToDo: deal with signed/unsigned 64-bit integer constants
@@ -1232,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
 }
 
-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                                          bool MandatoryLiteral) const {
   using namespace AMDGPU::EncValues;
 
   assert(Val < 1024); // enum10
@@ -1261,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
     return decodeFPImmed(Width, Val);
 
-  if (Val == LITERAL_CONST)
-    return decodeLiteralConstant();
+  if (Val == LITERAL_CONST) {
+    if (MandatoryLiteral)
+      // Keep a sentinel value for deferred setting
+      return MCOperand::createImm(LITERAL_CONST);
+    else
+      return decodeLiteralConstant();
+  }
 
   switch (Width) {
   case OPW32:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index dc879ec..eea6074 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -87,6 +87,7 @@ public:
   DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
                                        raw_string_ostream &KdStream) const;
 
+  DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
   DecodeStatus convertSDWAInst(MCInst &MI) const;
   DecodeStatus convertDPP8Inst(MCInst &MI) const;
   DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -150,9 +151,11 @@ public:
 
   static MCOperand decodeIntImmed(unsigned Imm);
   static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+  MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
   MCOperand decodeLiteralConstant() const;
 
-  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                        bool MandatoryLiteral = false) const;
   MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
   MCOperand decodeSpecialReg32(unsigned Val) const;
   MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ab34020..b68b4b1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     switch (OpTy) {
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
       printImmediate16(Op.getImm(), STI, O);
       break;
     case AMDGPU::OPERAND_REG_IMM_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index dbce4b2..4119605 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
   switch (OpInfo.OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
     return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     // FIXME Is this correct? What do inline immediates do on SI for f16 src
@@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
     uint32_t Encoding = getLit16Encoding(Lo16, STI);
     return Encoding;
   }
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16:
+    return MO.getImm();
   default:
     llvm_unreachable("invalid operand size");
   }
@@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
       (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
     return;
 
-  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  // Do not print literals from SISrc Operands for insts with mandatory literals
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (ImmLitIdx != -1)
+    return;
+
+  // Check for additional literals
   for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
 
     // Check if this operand should be encoded as [SV]Src
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 247ebe3..777744f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -139,64 +139,67 @@ enum ClassFlags : unsigned {
 }
 
 namespace AMDGPU {
-  enum OperandType : unsigned {
-    /// Operands with register or 32-bit immediate
-    OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
-    OPERAND_REG_IMM_INT64,
-    OPERAND_REG_IMM_INT16,
-    OPERAND_REG_IMM_FP32,
-    OPERAND_REG_IMM_FP64,
-    OPERAND_REG_IMM_FP16,
-    OPERAND_REG_IMM_V2FP16,
-    OPERAND_REG_IMM_V2INT16,
-    OPERAND_REG_IMM_V2INT32,
-    OPERAND_REG_IMM_V2FP32,
-
-    /// Operands with register or inline constant
-    OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_INT32,
-    OPERAND_REG_INLINE_C_INT64,
-    OPERAND_REG_INLINE_C_FP16,
-    OPERAND_REG_INLINE_C_FP32,
-    OPERAND_REG_INLINE_C_FP64,
-    OPERAND_REG_INLINE_C_V2INT16,
-    OPERAND_REG_INLINE_C_V2FP16,
-    OPERAND_REG_INLINE_C_V2INT32,
-    OPERAND_REG_INLINE_C_V2FP32,
-
-    /// Operands with an AccVGPR register or inline constant
-    OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_INT32,
-    OPERAND_REG_INLINE_AC_FP16,
-    OPERAND_REG_INLINE_AC_FP32,
-    OPERAND_REG_INLINE_AC_FP64,
-    OPERAND_REG_INLINE_AC_V2INT16,
-    OPERAND_REG_INLINE_AC_V2FP16,
-    OPERAND_REG_INLINE_AC_V2INT32,
-    OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
-
-    OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
-
-    // Operand for source modifiers for VOP instructions
-    OPERAND_INPUT_MODS,
-
-    // Operand for SDWA instructions
-    OPERAND_SDWA_VOPC_DST,
-
-    /// Operand with 32-bit immediate that uses the constant bus.
-    OPERAND_KIMM32,
-    OPERAND_KIMM16
-  };
+enum OperandType : unsigned {
+  /// Operands with register or 32-bit immediate
+  OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_REG_IMM_INT64,
+  OPERAND_REG_IMM_INT16,
+  OPERAND_REG_IMM_FP32,
+  OPERAND_REG_IMM_FP64,
+  OPERAND_REG_IMM_FP16,
+  OPERAND_REG_IMM_FP16_DEFERRED,
+  OPERAND_REG_IMM_FP32_DEFERRED,
+  OPERAND_REG_IMM_V2FP16,
+  OPERAND_REG_IMM_V2INT16,
+  OPERAND_REG_IMM_V2INT32,
+  OPERAND_REG_IMM_V2FP32,
+
+  /// Operands with register or inline constant
+  OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_INT32,
+  OPERAND_REG_INLINE_C_INT64,
+  OPERAND_REG_INLINE_C_FP16,
+  OPERAND_REG_INLINE_C_FP32,
+  OPERAND_REG_INLINE_C_FP64,
+  OPERAND_REG_INLINE_C_V2INT16,
+  OPERAND_REG_INLINE_C_V2FP16,
+  OPERAND_REG_INLINE_C_V2INT32,
+  OPERAND_REG_INLINE_C_V2FP32,
+
+  /// Operand with 32-bit immediate that uses the constant bus.
+  OPERAND_KIMM32,
+  OPERAND_KIMM16,
+
+  /// Operands with an AccVGPR register or inline constant
+  OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_INT32,
+  OPERAND_REG_INLINE_AC_FP16,
+  OPERAND_REG_INLINE_AC_FP32,
+  OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_V2INT16,
+  OPERAND_REG_INLINE_AC_V2FP16,
+  OPERAND_REG_INLINE_AC_V2INT32,
+  OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+
+  OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+  // Operand for source modifiers for VOP instructions
+  OPERAND_INPUT_MODS,
+
+  // Operand for SDWA instructions
+  OPERAND_SDWA_VOPC_DST
+
+};
 }
 
 // Input operand modifiers bit-masks
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d72fbe9..d5bd71f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3405,6 +3405,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   switch (OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -3443,6 +3444,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
     // This suffers the same problem as the scalar 16-bit cases.
     return AMDGPU::isInlinableIntLiteralV216(Imm);
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
     if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@@ -3836,6 +3838,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       break;
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
       break;
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 919fb18..8c29437 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1173,6 +1173,7 @@ class kimmOperand<ValueType vt> : Operand<vt> {
   let OperandType = "OPERAND_KIMM"#vt.Size;
   let PrintMethod = "printU"#vt.Size#"ImmOperand";
   let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+  let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
 }
 
 // 32-bit VALU immediate operand that uses the constant bus.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 6d93c56..49dbb89 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1020,6 +1020,30 @@ def VSrc_128 : RegisterOperand<VReg_128> {
 }
 
 //===----------------------------------------------------------------------===//
+//  VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+//  with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+                           string rc_suffix = "_32"> {
+  let OperandNamespace = "AMDGPU" in {
+    def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP16_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+    }
+
+    def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP32_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+      let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+    }
+  }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
+//===----------------------------------------------------------------------===//
 //  VRegSrc_* Operands with a VGPR
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index e68ffef..9da7b9f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1574,8 +1574,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
   switch (OpType) {
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 5bd9f85..aaf0612 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -789,6 +789,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
   switch (OpInfo.OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -797,6 +798,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
     return 4;
 
   case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -808,6 +811,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index f7d390c..ebb0d75 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -270,12 +270,11 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
 class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
   field dag Ins32 = !if(!eq(vt.Size, 32),
-                        (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
-                        (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+  field string Asm32 = "$vdst, $src0, $src1, $imm";
   field bit HasExt = 0;
   let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $src1, $imm";
 }
 
 def VOP_MADAK_F16 : VOP_MADAK <f16>;
@@ -283,11 +282,10 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;
 
 class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
-  field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field string Asm32 = "$vdst, $src0, $imm, $src1";
   field bit HasExt = 0;
   let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $imm, $src1";
 }
 
 def VOP_MADMK_F16 : VOP_MADMK <f16>;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
index 770a3d2..e8a3925 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@@ -275,6 +275,26 @@ v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 // GFX8-9: error: not a valid operand
 
 //===----------------------------------------------------------------------===//
+// VOP2
+//===----------------------------------------------------------------------===//
+
+v_fmaak_f32 v0, 0xff32ff, v0, 0x11213141
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32ff, 0x11213141, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmaak_f32 v0, 0xff32, v0, 0x1122
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32, 0x1122, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+//===----------------------------------------------------------------------===//
 // VOP2 E64.
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
index c3b455c..8519e91 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -10229,9 +10229,15 @@ v_fmamk_f32 v5, v1, 0xa1b1c1d1, v3
 v_fmamk_f32 v5, v1, 0x11213141, v255
 // GFX10: encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11]
 
+v_fmamk_f32 v5, 0x11213141, 0x11213141, v255
+// GFX10: encoding: [0xff,0xfe,0x0b,0x58,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v5, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11]
 
+v_fmaak_f32 v5, 0x11213141, v2, 0x11213141
+// GFX10: encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v255, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11]
 
@@ -11969,6 +11975,9 @@ v_fmamk_f16 v5, v1, 0x1121, v3
 v_fmamk_f16 v255, v1, 0x1121, v3
 // GFX10: encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
 
+v_fmamk_f16 v255, 0x1121, 0x1121, v3
+// GFX10: encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+
 v_fmamk_f16 v5, v255, 0x1121, v3
 // GFX10: encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00]
 
@@ -12014,6 +12023,9 @@ v_fmaak_f16 v5, -4.0, v2, 0x1121
 v_fmaak_f16 v5, v1, v255, 0x1121
 // GFX10: encoding: [0x01,0xff,0x0b,0x70,0x21,0x11,0x00,0x00]
 
+v_fmaak_f16 v5, 0x1121, v255, 0x1121
+// GFX10: encoding: [0xff,0xfe,0x0b,0x70,0x21,0x11,0x00,0x00]
+
 v_fmaak_f16 v5, v1, v2, 0xa1b1
 // GFX10: encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
index 9642aa0..c7a5f94 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
@@ -2337,6 +2337,9 @@ v_madmk_f16 v5, v1, 0xa1b1, v3
 v_madmk_f16 v5, v1, 0x1121, v255
 // CHECK: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
 
+v_madmk_f16 v5, 0x1121, 0x1121, v255
+// CHECK: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+
 v_madak_f16 v5, v1, v2, 0x1121
 // CHECK: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
 
@@ -2367,6 +2370,9 @@ v_madak_f16 v5, v1, v255, 0x1121
 v_madak_f16 v5, v1, v2, 0xa1b1
 // CHECK: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
 
+v_madak_f16 v5, 0x1121, v2, 0x1121
+// CHECK: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+
 v_add_u16 v5, v1, v2
 // CHECK: [0x01,0x05,0x0a,0x4c]
 
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 678079f..edf397a 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -843,6 +843,20 @@ v_madak_f32 v0, shared_base, v0, 0x11213141
 // NOGCN: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, scc, v0, 0x11213141
 
+// NOGCN: error: only one literal operand is allowed
+v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+
+// NOGCN: error: only one literal operand is allowed
+v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madak_f16 v0, 0xff32, v0, 0x1122
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madmk_f16 v0, 0xff32, 0x1122, v0
+
 // NOSICIVI: error: register not available on this GPU
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, private_limit
diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s
index 11d4f68..bf2c392 100644
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@@ -270,6 +270,14 @@ v_madmk_f32 v1, v2, 64.0, v3
 // VI:   v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
 v_madak_f32 v1, v2, v3, 64.0
 
+// SICI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x42,0x41,0x31,0x21,0x11]
+// VI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+v_madak_f32 v0, 0x11213141, v0, 0x11213141
+
+// SICI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x40,0x41,0x31,0x21,0x11]
+// VI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+v_madmk_f32 v0, 0x11213141, 0x11213141, v0
+
 // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
 // VI:   v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
 v_bcnt_u32_b32_e64 v1, v2, v3
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 41fe8b0..7ba35e0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -73781,6 +73781,9 @@
 # GFX10: v_fmaak_f16 v5, -1, v2, 0x1121          ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
 
+# GFX10: v_fmaak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmaak_f32 v5, -1, v2, 0x11213141      ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
 
@@ -73796,6 +73799,9 @@
 # GFX10: v_fmaak_f32 v5, 0, v2, 0x11213141       ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
 
+# GFX10: v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 ; encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmaak_f16 v5, 0.5, v2, 0x1121         ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
 
@@ -74150,6 +74156,9 @@
 # GFX10: v_fmamk_f16 v255, v1, 0x1121, v3        ; encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
 0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00
 
+# GFX10: v_fmamk_f16 v255, 0x1121, 0x1121, v3    ; encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmamk_f32 v255, v1, 0x11213141, v3    ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11]
 0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11
 
@@ -74159,6 +74168,9 @@
 # GFX10: v_fmamk_f32 v5, -1, 0x11213141, v3      ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
 0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
 
+# GFX10: v_fmamk_f32 v5, 0x11213141, 0x11213141, v3 ; encoding: [0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
+0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmamk_f16 v5, -4.0, 0x1121, v3        ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00]
 0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
index eee589f..a39a25e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@@ -32160,6 +32160,9 @@
 # CHECK: v_madmk_f32 v5, v1, 0x11213141, v255    ; encoding: [0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11]
 0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11
 
+# CHECK: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11
+
 # CHECK: v_madak_f32 v5, v1, v2, 0x11213141      ; encoding: [0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11]
 0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11
 
@@ -32187,6 +32190,9 @@
 # CHECK: v_madak_f32 v5, v1, v2, 0xa1b1c1d1      ; encoding: [0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1]
 0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1
 
+# CHECK: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11
+
 # CHECK: v_add_co_u32_e32 v5, vcc, v1, v2        ; encoding: [0x01,0x05,0x0a,0x32]
 0x01,0x05,0x0a,0x32
 
@@ -33783,6 +33789,9 @@
 # CHECK: v_madmk_f16 v5, v1, 0x1121, v255        ; encoding: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
 0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00
 
+# CHECK: v_madmk_f16 v5, 0x1121, 0x1121, v255    ; encoding: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00
+
 # CHECK: v_madak_f16 v5, v1, v2, 0x1121          ; encoding: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00
 
@@ -33810,6 +33819,9 @@
 # CHECK: v_madak_f16 v5, v1, v2, 0xa1b1          ; encoding: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00
 
+# CHECK: v_madak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00
+
 # CHECK: v_add_u16_e32 v5, v1, v2                ; encoding: [0x01,0x05,0x0a,0x4c]
 0x01,0x05,0x0a,0x4c
 
-- 
2.7.4