From 283b995097f91f340f69b229d2554e95a87fbe10 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 29 Aug 2018 07:46:09 +0000 Subject: [PATCH] AMDGPU: Fix getInstSizeInBytes Summary: Add some optional code to validate getInstSizeInBytes for emitted instructions. This flushed out some issues which are fixed by this patch: - Streamline getInstSizeInBytes - Properly define the VI readlane/writelane instruction as VOP3 - Fix the inline constant determination. Specifically, this change fixes an issue where a 32-bit value of 0xffffffff was recorded as unsigned. This is equal to -1 when restricting to a 32-bit comparison, and an inline constant can be used. Reviewers: arsenm, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D50629 Change-Id: Id87c3b7975839da0de8156a124b0ce98c5fb47f2 llvm-svn: 340903 --- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 20 +++++++++++++++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 31 ++++++++++------------- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 9 ------- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 19 ++++++++------ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll | 2 +- 5 files changed, 46 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 1876dc3..f6bdbf5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); +#ifdef EXPENSIVE_CHECKS + // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot + // work correctly for the generic CPU). + // + // The isPseudo check really shouldn't be here, but unfortunately there are + // some negative lit tests that depend on being able to continue through + // here even when pseudo instructions haven't been lowered. + if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) { + SmallVector Fixups; + SmallVector CodeBytes; + raw_svector_ostream CodeStream(CodeBytes); + + std::unique_ptr InstEmitter(createSIMCCodeEmitter( + *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext)); + InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI); + + assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); + } +#endif + if (STI.dumpCode()) { // Disassemble instruction/operands to text. DisasmLines.resize(DisasmLines.size() + 1); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index bd6c5ac..59dfb8a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2398,8 +2398,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: { int32_t Trunc = static_cast(Imm); - return Trunc == Imm && - AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); + return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); } case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -4975,12 +4974,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // If we have a definitive size, we can use it. Otherwise we need to inspect // the operands to know the size. - // - // FIXME: Instructions that have a base 32-bit encoding report their size as - // 4, even though they are really 8 bytes if they have a literal operand. - if (DescSize != 0 && DescSize != 4) - return DescSize; - if (isFixedSize(MI)) return DescSize; @@ -4989,23 +4982,27 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (isVALU(MI) || isSALU(MI)) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) - return 4; // No operands. + return DescSize; // No operands. if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) - return 8; + return DescSize + 4; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) - return 4; + return DescSize; if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) - return 8; + return DescSize + 4; - return 4; - } + int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + if (Src2Idx == -1) + return DescSize; - if (DescSize == 4) - return 4; + if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) + return DescSize + 4; + + return DescSize; + } switch (Opc) { case TargetOpcode::IMPLICIT_DEF: @@ -5021,7 +5018,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } default: - llvm_unreachable("unable to find instruction size"); + return DescSize; } } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 3a8dcab..089e78b9 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -716,12 +716,6 @@ class VOP2_DPP op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { -multiclass VOP32_Real_vi op> { - def _vi : - VOP2_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi(NAME).Pfl>; -} - multiclass VOP2_Real_MADK_vi op> { def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>, VOP2_MADKe(NAME).Pfl>; @@ -899,9 +893,6 @@ defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; -defm V_READLANE_B32 : VOP32_Real_vi <0x289>; -defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; - defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 26bc526..2d558a3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -651,23 +651,23 @@ defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>; let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP3_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl>; } multiclass VOP3be_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3be_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3be_vi (NAME).Pfl>; } multiclass VOP3OpSel_Real_gfx9 op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3OpSel_gfx9 (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 (NAME).Pfl>; } multiclass VOP3Interp_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3Interp_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3Interp_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" @@ -813,6 +813,9 @@ defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>; defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; +defm V_READLANE_B32 : VOP3_Real_vi <0x289>; +defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>; + defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll index 361756a..1b9a762 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0 -- 2.7.4