From cbeb25ce9122bdc20b76d1a24fcf3080873a8641 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 19 May 2022 15:18:36 +0100 Subject: [PATCH] aco: make FLAT_instruction::offset signed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_assembler.cpp | 11 +++++++---- src/amd/compiler/aco_instruction_selection.cpp | 10 ++++------ src/amd/compiler/aco_ir.cpp | 12 ++++++++++++ src/amd/compiler/aco_ir.h | 5 ++++- src/amd/compiler/aco_opcodes.py | 2 +- src/amd/compiler/aco_print_ir.cpp | 2 +- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 4bccd57..e302795 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -507,16 +507,19 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst FLAT_instruction& flat = instr->flatlike(); uint32_t encoding = (0b110111 << 26); encoding |= opcode << 18; - if (ctx.gfx_level <= GFX9) { - assert(flat.offset <= 0x1fff); + if (ctx.gfx_level == GFX9 || ctx.gfx_level >= GFX11) { + if (instr->isFlat()) + assert(flat.offset <= 0xfff); + else + assert(flat.offset >= -4096 && flat.offset < 4096); encoding |= flat.offset & 0x1fff; - } else if (instr->isFlat()) { + } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) { /* GFX10 has a 12-bit immediate OFFSET field, * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug */ assert(flat.offset == 0); } else { - assert(flat.offset <= 0xfff); + assert(flat.offset >= -2048 && flat.offset <= 2047); encoding |= flat.offset & 0xfff; } if (instr->isScratch()) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 790ea9a..876c15b 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4467,12 +4467,10 @@ lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout, uint64_t max_const_offset_plus_one = 1; /* GFX7/8/9: FLAT loads do not support constant offsets */ - if (bld.program->gfx_level >= GFX10) - max_const_offset_plus_one = - 2048; /* GLOBAL has a 11-bit signed offset field (12 bits if signed) */ - else if (bld.program->gfx_level == GFX6 || bld.program->gfx_level == GFX9) - max_const_offset_plus_one = - 4096; /* MUBUF/GLOBAL has a 12-bit unsigned offset field (13 bits if signed for GLOBAL) */ + if (bld.program->gfx_level >= GFX9) + max_const_offset_plus_one = bld.program->dev.scratch_global_offset_max; + else if (bld.program->gfx_level == GFX6) + max_const_offset_plus_one = 4096; /* MUBUF has a 12-bit unsigned offset field */ uint64_t excess_offset = const_offset - (const_offset % max_const_offset_plus_one); const_offset %= max_const_offset_plus_one; diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 45ae5be..5d325f8 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -155,6 +155,18 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN) program->dev.fused_mad_mix = true; + if (program->gfx_level >= GFX11) { + program->dev.scratch_global_offset_min = -4096; + program->dev.scratch_global_offset_max = 4095; + } else if (program->gfx_level >= GFX10 || program->gfx_level == GFX8) { + program->dev.scratch_global_offset_min = -2048; + program->dev.scratch_global_offset_max = 2047; + } else if (program->gfx_level == GFX9) { + /* The minimum is actually -4096, but negative offsets are broken when SADDR is used. */ + program->dev.scratch_global_offset_min = 0; + program->dev.scratch_global_offset_max = 4095; + } + program->wgp_mode = wgp_mode; program->progress = CompilationProgress::after_isel; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 0a00de4..bc98987 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1646,7 +1646,7 @@ struct FLAT_instruction : public Instruction { bool nv : 1; bool disable_wqm : 1; /* Require an exec mask without helper invocations */ uint8_t padding0 : 2; - uint16_t offset; /* Vega/Navi only */ + int16_t offset; /* Vega/Navi only */ uint16_t padding1; }; static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); @@ -2066,6 +2066,9 @@ struct DeviceInfo { bool fused_mad_mix = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; + + int16_t scratch_global_offset_min; + int16_t scratch_global_offset_max; }; enum class CompilationProgress { diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 509de09..64a4b39 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -158,7 +158,7 @@ class Format(Enum): return [('uint8_t', 'opsel_lo', None), ('uint8_t', 'opsel_hi', None)] elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]: - return [('uint16_t', 'offset', 0), + return [('int16_t', 'offset', 0), ('memory_sync_info', 'sync', 'memory_sync_info()'), ('bool', 'glc', 'false'), ('bool', 'slc', 'false'), diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 11ede16..49f133f 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -484,7 +484,7 @@ print_instr_format_specific(const Instruction* instr, FILE* output) case Format::SCRATCH: { const FLAT_instruction& flat = instr->flatlike(); if (flat.offset) - fprintf(output, " offset:%u", flat.offset); + fprintf(output, " offset:%d", flat.offset); if (flat.glc) fprintf(output, " glc"); if (flat.dlc) -- 2.7.4