}
bool isVSrcB16() const {
- return isVCSrcF16() || isLiteralImm(MVT::i16);
+ return isVCSrcB16() || isLiteralImm(MVT::i16);
}
bool isVSrcV2B16() const {
return isUIntN(Size, Val) || isIntN(Size, Val);
}
+static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
+ if (VT.getScalarType() == MVT::i16) {
+ // FP immediate values are broken.
+ return isInlinableIntLiteral(Val);
+ }
+
+ // f16/v2f16 operands work correctly for all values.
+ return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
+}
+
bool AMDGPUOperand::isInlinableImm(MVT type) const {
// This is a hack to enable named inline values like
return false;
if (type.getScalarSizeInBits() == 16) {
- return AMDGPU::isInlinableLiteral16(
+ return isInlineableLiteralOp16(
static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
- AsmParser->hasInv2PiInlineImm());
+ type, AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
}
if (type.getScalarSizeInBits() == 16) {
- return AMDGPU::isInlinableLiteral16(
+ return isInlineableLiteralOp16(
static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
- AsmParser->hasInv2PiInlineImm());
+ type, AsmParser->hasInv2PiInlineImm());
}
return AMDGPU::isInlinableLiteral32(
return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
+ return AMDGPU::isInlinableIntLiteral(Val);
+
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
+ return AMDGPU::isInlinableIntLiteralV216(Val);
+
+ if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
- OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
- } else {
- return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
- }
+
+ return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
}
default:
llvm_unreachable("invalid operand size");
printOperand(MI, OpNo, STI, O);
}
+void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int16_t SImm = static_cast<int16_t>(Imm);
+ if (isInlinableIntLiteral(SImm))
+ O << SImm;
+ else
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
+ if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}
if (Op.isReg()) {
printRegOperand(Op.getReg(), O, MRI);
} else if (Op.isImm()) {
- switch (Desc.OpInfo[OpNo].OperandType) {
+ const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType;
+ switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
printImmediate64(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
+ printImmediateInt16(Op.getImm(), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
printImmediate32(Op.getImm(), STI, O);
break;
}
+
+ // Deal with 16-bit FP inline immediates not working.
+ if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
+ printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ }
LLVM_FALLTHROUGH;
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
raw_ostream &O);
void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
return 0;
}
+static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ return IntImm == 0 ? 255 : IntImm;
+}
+
static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
if (IntImm != 0)
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
-
case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
LLVM_FALLTHROUGH;
+ }
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
uint32_t Encoding = getLit16Encoding(Lo16, STI);
return AMDGPU::isInlinableLiteral64(MO.getImm(),
ST.hasInv2PiInlineImm());
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ // We would expect inline immediates to not be concerned with an integer/fp
+ // distinction. However, in the case of 16-bit integer operations, the
+ // "floating point" values appear to not work. It seems read the low 16-bits
+ // of 32-bit immediates, which happens to always work for the integer
+ // values.
+ //
+ // See llvm bugzilla 46302.
+ //
+ // TODO: Theoretically we could use op-sel to use the high bits of the
+ // 32-bit FP values.
+ return AMDGPU::isInlinableIntLiteral(Imm);
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ // This suffers the same problem as the scalar 16-bit cases.
+ return AMDGPU::isInlinableIntLiteralV216(Imm);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
return false;
}
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint32_t Trunc = static_cast<uint32_t>(Imm);
return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
return Imm < -16 && Imm >= -64;
}], NegateImm>;
-def NegSubInlineConst16 : ImmLeaf<i16, [{
+def NegSubInlineIntConst16 : ImmLeaf<i16, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
}
-bool isInlinableIntLiteral(int64_t Literal) {
- return Literal >= -16 && Literal <= 64;
-}
-
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
if (isInlinableIntLiteral(Literal))
return true;
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
}
+bool isInlinableIntLiteralV216(int32_t Literal) {
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ if (isInt<16>(Literal) || isUInt<16>(Literal))
+ return isInlinableIntLiteral(Lo16);
+
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ if (!(Literal & 0xffff))
+ return isInlinableIntLiteral(Hi16);
+ return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
+}
+
bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
return getOperandSize(Desc.OpInfo[OpNo]);
}
+/// Is this literal inlinable, and not one of the values intended for floating
+/// point values.
+LLVM_READNONE
+inline bool isInlinableIntLiteral(int64_t Literal) {
+ return Literal >= -16 && Literal <= 64;
+}
+
/// Is this literal inlinable
LLVM_READNONE
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableIntLiteralV216(int32_t Literal);
+
bool isArgPassedInSGPR(const Argument *Arg);
LLVM_READONLY
}
};
-LLVM_READNONE
-bool isInlinableIntLiteral(int64_t Literal);
-
} // end namespace AMDGPU
} // end namespace llvm
// an inline immediate than -c.
// TODO: Also do for 64-bit.
def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
def : GCNPat<
- (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0.5 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe1,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_0.5_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 0.5, v2 ; encoding: [0xf0,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffb800 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe3,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_neg_0.5_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xf1,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 1.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe5,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_1.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 1.0, v2 ; encoding: [0xf2,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffbc00 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe7,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_neg_1.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xf3,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xe9,0x01,0x00]
+; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: shl_inline_imm_2.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xe9,0x01,0x00]
+; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
+; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xeb,0x01,0x00]
+; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: shl_inline_imm_neg_2.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xeb,0x01,0x00]
+; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
+; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 4.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xed,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_4.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 4.0, v2 ; encoding: [0xf6,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffc400 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xef,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_neg_4.0_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xf7,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0.15915494 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xf1,0x01,0x00]
+; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; VI-LABEL: mul_inline_imm_inv2pi_i16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT: v_mul_lo_u16_e32 v2, 0.15915494, v2 ; encoding: [0xf8,0x04,0x04,0x52]
+; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
}
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0xb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0x00]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0xbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0x00]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
-; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40004000
+; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
-; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_lshlrev_b16 v0, v0, 0x4000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x40,0x00,0x00]
define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
-; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc000c000
+; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
-; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_lshlrev_b16 v0, v0, 0xc000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc0,0x00,0x00]
define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0xc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0x00]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
-; GFX9: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0]
+; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_brev_b32 s0, 35
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, -4.0 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v2, v3, -4.0 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v2, v3, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_brev_b32 s0, 34
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, 4.0 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v2, v3, 4.0 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v2, v3, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT: v_mov_b32_e32 v4, 0xffffc400
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v0, v[0:1]
-; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT: s_movk_i32 s0, 0xc400
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_u16_e32 v1, 0xffffc400, v0
+; VI-NEXT: v_add_u16_e32 v1, s0, v0
; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: flat_store_dword v[2:3], v0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_mov_b32 s0, 0x3c003c00
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v2, v3, 1.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_u16 v2, v3, 0x3c00 op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT: v_mov_b32_e32 v4, 0x4400
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v0, v[0:1]
-; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT: s_movk_i32 s0, 0x4400
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_u16_e32 v1, 4.0, v0
+; VI-NEXT: v_add_u16_e32 v1, s0, v0
; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: flat_store_dword v[2:3], v0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_mov_b32 s0, 0xbc00bc00
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, -1.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v2, v3, -1.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_u16 v2, v3, 0xbc00 op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT: v_mov_b32_e32 v4, 0x4000
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v0, v[0:1]
-; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT: s_movk_i32 s0, 0x4000
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_u16_e32 v1, 2.0, v0
+; VI-NEXT: v_add_u16_e32 v1, s0, v0
; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: flat_store_dword v[2:3], v0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_mov_b32 s0, 0xc000c000
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, -2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v2, v3, -2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_u16 v2, v3, 0xc000 op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT: v_mov_b32_e32 v4, 0xffffc000
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v0, v[0:1]
-; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT: s_movk_i32 s0, 0xc000
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_u16_e32 v1, 0xffffc000, v0
+; VI-NEXT: v_add_u16_e32 v1, s0, v0
; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: flat_store_dword v[2:3], v0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-NEXT: s_mov_b32 s0, 0x40004000
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v2, v3, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_u16 v2, v3, 0x4000 op_sel_hi:[1,0]
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
+// XFAIL: *
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s
+// XFAIL: *
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
ds_add_u32 v1, v2 offset:65535
--- /dev/null
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9ERR %s
+
+v_cvt_f16_u16_e64 v5, 0.5
+// GFX9ERR: error: invalid literal operand
+
+v_cvt_f16_u16_e64 v5, -4.0
+// GFX9ERR: error: invalid literal operand
+
+v_add_u16_e64 v5, v1, 0.5
+// GFX9ERR: error: invalid literal operand
+
+v_add_u16_e64 v5, v1, -4.0
+// GFX9ERR: error: invalid literal operand
+
+v_cvt_f16_i16_e64 v5, 0.5
+// GFX9ERR: error: invalid literal operand
+
+v_cvt_f16_i16_e64 v5, -4.0
+// GFX9ERR: error: invalid literal operand
+
+v_add_u16_e64 v5, 0.5, v2
+// GFX9ERR: error: invalid literal operand
+
+v_add_u16_e64 v5, -4.0, v2
+// GFX9ERR: error: invalid literal operand
+
+v_subrev_u16_e64 v5, v1, 0.5
+// GFX9ERR: error: invalid literal operand
+
+v_subrev_u16_e64 v5, v1, -4.0
+// GFX9ERR: error: invalid literal operand
+// XFAIL: *
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits -show-encoding %s | FileCheck %s
v_cvt_f16_u16_e64 v5, -1
// CHECK: [0x05,0x00,0x79,0xd1,0xc1,0x00,0x00,0x00]
-v_cvt_f16_u16_e64 v5, 0.5
-// CHECK: [0x05,0x00,0x79,0xd1,0xf0,0x00,0x00,0x00]
-
-v_cvt_f16_u16_e64 v5, -4.0
-// CHECK: [0x05,0x00,0x79,0xd1,0xf7,0x00,0x00,0x00]
-
v_cvt_f16_u16_e64 v5, v1 clamp
// CHECK: [0x05,0x80,0x79,0xd1,0x01,0x01,0x00,0x00]
v_cvt_f16_i16_e64 v5, -1
// CHECK: [0x05,0x00,0x7a,0xd1,0xc1,0x00,0x00,0x00]
-v_cvt_f16_i16_e64 v5, 0.5
-// CHECK: [0x05,0x00,0x7a,0xd1,0xf0,0x00,0x00,0x00]
-
-v_cvt_f16_i16_e64 v5, -4.0
-// CHECK: [0x05,0x00,0x7a,0xd1,0xf7,0x00,0x00,0x00]
-
v_cvt_f16_i16_e64 v5, v1 clamp
// CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x01,0x00,0x00]
v_add_u16_e64 v5, -1, v2
// CHECK: [0x05,0x00,0x26,0xd1,0xc1,0x04,0x02,0x00]
-v_add_u16_e64 v5, 0.5, v2
-// CHECK: [0x05,0x00,0x26,0xd1,0xf0,0x04,0x02,0x00]
-
-v_add_u16_e64 v5, -4.0, v2
-// CHECK: [0x05,0x00,0x26,0xd1,0xf7,0x04,0x02,0x00]
-
v_add_u16_e64 v5, v1, v255
// CHECK: [0x05,0x00,0x26,0xd1,0x01,0xff,0x03,0x00]
v_add_u16_e64 v5, v1, -1
// CHECK: [0x05,0x00,0x26,0xd1,0x01,0x83,0x01,0x00]
-v_add_u16_e64 v5, v1, 0.5
-// CHECK: [0x05,0x00,0x26,0xd1,0x01,0xe1,0x01,0x00]
-
-v_add_u16_e64 v5, v1, -4.0
-// CHECK: [0x05,0x00,0x26,0xd1,0x01,0xef,0x01,0x00]
-
v_sub_u16 v5, v1, v2
// CHECK: [0x01,0x05,0x0a,0x4e]
// CHECK: [0xc1,0x04,0x0a,0x4e]
v_sub_u16 v5, 0.5, v2
-// CHECK: [0xf0,0x04,0x0a,0x4e]
+// CHECK: [0xf0,0x04,0x0a,0x4e,00,0x38,0x00,0x00]
v_sub_u16 v5, -4.0, v2
-// CHECK: [0xf7,0x04,0x0a,0x4e]
+// CHECK: [0xf7,0x04,0x0a,0x4e,0x00,0xc4,0x00,0x00]
v_sub_u16 v5, 0xfe0b, v2
// CHECK: [0xff,0x04,0x0a,0x4e,0x0b,0xfe,0x00,0x00]
v_subrev_u16_e64 v5, v1, -1
// CHECK: [0x05,0x00,0x28,0xd1,0x01,0x83,0x01,0x00]
-v_subrev_u16_e64 v5, v1, 0.5
-// CHECK: [0x05,0x00,0x28,0xd1,0x01,0xe1,0x01,0x00]
-
-v_subrev_u16_e64 v5, v1, -4.0
-// CHECK: [0x05,0x00,0x28,0xd1,0x01,0xef,0x01,0x00]
-
v_mul_lo_u16 v5, v1, v2
// CHECK: [0x01,0x05,0x0a,0x52]
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s
v_pk_add_f16 v1, -17, v2
// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 0x1000ffff, v2
// GFX9: error: invalid literal operand
+
+v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400
+// GFX9: error: invalid literal operand
+// GFX10: error: invalid literal operand
+
+v_pk_mad_i16 v5, 0x3c00, 0x4000, 2
+// GFX9: error: invalid literal operand
+// GFX10: error: invalid literal operand
+
+v_pk_mad_i16 v5, 0x3c00, 3, 2
+// GFX9: error: invalid literal operand
+// GFX10-NOT: error:
+
+v_pk_mad_i16 v5, 3, 0x3c00, 2
+// GFX9: error: invalid literal operand
+// GFX10-NOT: error:
+
+v_pk_mad_i16 v5, 3, 2, 0x3c00
+// GFX9: error: invalid literal operand
+// GFX10-NOT: error:
--- /dev/null
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s
+
+v_mad_i16 v5, v1, 4.0, v3
+// GFX10: v_mad_i16 v5, v1, 0x4400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0x44,0x00,0x00]
+
+v_mad_i16 v5, v1, 0x4400, v3
+// GFX10: v_mad_i16 v5, v1, 0x4400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0x44,0x00,0x00]
+
+v_mad_i16 v5, v1, -4.0, v3
+// GFX10: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00]
+
+v_mad_i16 v5, v1, 0xc400, v3
+// GFX10: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00]
// GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
v_mad_i16 v5, v1, v2, -4.0
-// GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_i16 v5, v1, v2, v3 clamp
// GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04]
// GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
v_mad_legacy_i16 v5, v1, v2, -4.0
-// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_legacy_i16 v5, v1, v2, -4.0 clamp
-// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_legacy_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
// GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
v_mad_legacy_u16 v5, v1, v2, -4.0
-// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_legacy_u16 v5, v1, v2, -4.0 clamp
-// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
// GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
v_mad_u16 v5, v1, v2, -4.0
-// GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
+// NOGFX9: invalid literal operand
v_mad_u16 v5, v1, v2, v3 clamp
// GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04]
// VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04]
v_mad_i16 v5, v1, -4.0, v3
-// VI: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04]
+// NOVI: error: invalid literal operand
v_mad_i16 v5, v1, v2, 0
// VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02]
// VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04]
v_mad_u16 v5, v1, v2, -4.0
-// VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+// NOVI: error: invalid literal operand
///===---------------------------------------------------------------------===//
// VOP3 with Integer Clamp
// NOSICI: error:
// NOVI: error:
-// GFX9: v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x60,0xf7,0x16,0x86,0x06]
+// NOGFX9: error: invalid operand for instruction
v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
// NOSICI: error:
// NOVI: error:
-// GFX9: v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x60,0xf7,0x16,0x8e,0x06]
+// NOGFX9: error: invalid operand for instruction
v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
// NOSICI: error:
+# XFAIL: *
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s
+# XFAIL: *
# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s
# CHECK: ds_add_u32 v1, v2 offset:65535 ; encoding: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00]
+# XFAIL: *
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s
# CHECK: ds_add_u32 v1, v2 offset:65535 ; encoding: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00]
# GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b]
0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b
-# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b]
+# GFX10: v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0xfe,0xfd,0x1b,0x00,0x3c,0x00,0x00]
0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b
# GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b]
# GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04
-# GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xfe,0x03]
0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03
# GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04]
# GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04
-# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xfe,0x03]
0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03
-# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_i16 v5, v1, v2, 0xc400 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xfe,0x03]
0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03
# GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
# GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04
-# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xfe,0x03]
0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03
-# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_u16 v5, v1, v2, 0xc400 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xfe,0x03]
0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03
# GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
# GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04
-# GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xfe,0x03]
0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03
# GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04]