From af717d4acac81b3abef6d76123b41d96c2bc7356 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Wed, 16 Mar 2022 17:33:02 +0530 Subject: [PATCH] [AMDGPU][MachineVerifier] Alignment check for fp32 packed math instructions The fp32 packed math instructions are introduced in gfx90a. If their vector register operands are not properly aligned, the verifier should flag them. Currently, the verifier failed to report it and the compiler ended up emitting a broken assembly. This patch fixes that missed case in TII::verifyInstruction. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D121794 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 1 + .../AMDGPU/verify-gfx90a-aligned-vgprs.mir | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 8b79e1c8f087..0bf3ef52de47 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4051,6 +4051,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_V2FP32: break; case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: diff --git a/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir b/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir index 16d308bfcc7f..bc6137eceb38 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir @@ -109,6 +109,35 @@ body: | %11:areg_128_align2 = IMPLICIT_DEF DS_WRITE_B64_gfx9 %9, %10, 0, 0, implicit $exec DS_WRITE_B64_gfx9 %9, %11.sub1_sub2, 0, 0, implicit $exec + + ; Check aligned vgprs for FP32 Packed Math instructions. + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + ; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers *** + %12:vreg_64 = IMPLICIT_DEF + %13:vreg_64_align2 = IMPLICIT_DEF + %14:areg_96_align2 = IMPLICIT_DEF + $vgpr3_vgpr4 = V_PK_MOV_B32 8, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1 = V_PK_ADD_F32 0, %12, 11, %13, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_ADD_F32 0, %13, 11, %12, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_ADD_F32 0, %13, 11, %14.sub1_sub2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_ADD_F32 0, %14.sub1_sub2, 11, %13, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_MUL_F32 0, %12, 11, %13, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_MUL_F32 0, %13, 11, %12, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_MUL_F32 0, %13, 11, %14.sub1_sub2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = V_PK_MUL_F32 0, %14.sub1_sub2, 11, %13, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = nofpexcept V_PK_FMA_F32 8, %12, 8, %13, 11, %14.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = nofpexcept V_PK_FMA_F32 8, %13, 8, %12, 11, %14.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = nofpexcept V_PK_FMA_F32 8, %13, 8, %13, 11, %14.sub1_sub2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec ... # FIXME: Inline asm is not verified -- 2.34.1