From d3b03fedd8cc689a0d1571bc1f5a5aa52371a304 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 21 Oct 2022 15:10:40 -0400 Subject: [PATCH] amd: add initial code for gfx940 Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_debug.c | 5 +++++ src/amd/common/ac_gpu_info.c | 3 +++ src/amd/common/amd_family.c | 2 ++ src/amd/common/amd_family.h | 1 + src/amd/common/meson.build | 1 + src/amd/llvm/ac_llvm_util.c | 2 ++ src/amd/registers/makeregheader.py | 1 + src/amd/registers/parse_kernel_headers.py | 12 +++++++++++- src/gallium/drivers/radeonsi/si_compute.c | 7 +++++-- src/gallium/drivers/radeonsi/si_state.c | 5 +++++ 10 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index 0f0aae3..bf108fc 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -127,6 +127,11 @@ static const struct si_reg *find_register(enum amd_gfx_level gfx_level, enum rad table_size = ARRAY_SIZE(gfx10_reg_table); break; case GFX9: + if (family == CHIP_GFX940) { + table = gfx940_reg_table; + table_size = ARRAY_SIZE(gfx940_reg_table); + break; + } table = gfx9_reg_table; table_size = ARRAY_SIZE(gfx9_reg_table); break; diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index f37c85b..7fdaa8e 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -40,9 +40,11 @@ #define AMDGPU_MI100_RANGE 0x32, 0x3C #define AMDGPU_MI200_RANGE 0x3C, 0xFF +#define AMDGPU_GFX940_RANGE 0x46, 0xFF #define ASICREV_IS_MI100(r) ASICREV_IS(r, MI100) #define ASICREV_IS_MI200(r) ASICREV_IS(r, MI200) +#define ASICREV_IS_GFX940(r) ASICREV_IS(r, GFX940) #ifdef _WIN32 #define DRM_CAP_ADDFB2_MODIFIERS 0x10 @@ -824,6 +826,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) identify_chip(VEGA20); identify_chip(MI100); identify_chip(MI200); + identify_chip(GFX940); break; case FAMILY_RV: identify_chip(RAVEN); diff --git a/src/amd/common/amd_family.c b/src/amd/common/amd_family.c index 686b0b2..a8a227c 100644 --- a/src/amd/common/amd_family.c +++ b/src/amd/common/amd_family.c @@ -82,6 +82,8 @@ const char *ac_get_family_name(enum radeon_family family) return "MI100"; case CHIP_MI200: return "MI200"; + case CHIP_GFX940: + return "GFX940"; case CHIP_NAVI10: return "NAVI10"; case CHIP_NAVI12: diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h index 9699b8b..532c4ad 100644 --- a/src/amd/common/amd_family.h +++ b/src/amd/common/amd_family.h @@ -116,6 +116,7 @@ enum radeon_family CHIP_RENOIR, /* Ryzen 4000, 5000 */ CHIP_MI100, CHIP_MI200, + CHIP_GFX940, /* GFX10.1 (RDNA 1) */ CHIP_NAVI10, /* Radeon 5600, 5700 */ CHIP_NAVI12, /* Radeon Pro 5600M */ diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index c4b7686..21cb4aa 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -25,6 +25,7 @@ amd_json_files = [ '../registers/gfx8.json', '../registers/gfx81.json', '../registers/gfx9.json', + '../registers/gfx940.json', '../registers/gfx10.json', '../registers/gfx103.json', '../registers/gfx11.json', diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index 4e89c46..9ac5102 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -156,6 +156,8 @@ const char *ac_get_llvm_processor_name(enum radeon_family family) return "gfx908"; case CHIP_MI200: return "gfx90a"; + case CHIP_GFX940: + return "gfx940"; case CHIP_NAVI10: return "gfx1010"; case CHIP_NAVI12: diff --git a/src/amd/registers/makeregheader.py b/src/amd/registers/makeregheader.py index db5def9..e0c9437 100644 --- a/src/amd/registers/makeregheader.py +++ b/src/amd/registers/makeregheader.py @@ -46,6 +46,7 @@ CHIPS = [ Object(name='gfx8', disambiguation='GFX8'), Object(name='gfx81', disambiguation='GFX81'), Object(name='gfx9', disambiguation='GFX9'), + Object(name='gfx940', disambiguation='GFX940'), Object(name='gfx10', disambiguation='GFX10'), Object(name='gfx103', disambiguation='GFX103'), Object(name='gfx11', disambiguation='GFX11'), diff --git a/src/amd/registers/parse_kernel_headers.py b/src/amd/registers/parse_kernel_headers.py index f3a4386..d10e475 100644 --- a/src/amd/registers/parse_kernel_headers.py +++ b/src/amd/registers/parse_kernel_headers.py @@ -36,6 +36,12 @@ gfx_levels = { 'asic_reg/gc/gc_9_2_1_sh_mask.h', 'vega10_enum.h', ], + 'gfx940': [ + [0x00002000, 0x0000A000, 0, 0, 0], # IP_BASE GC_BASE + 'asic_reg/gc/gc_9_4_3_offset.h', + 'asic_reg/gc/gc_9_4_3_sh_mask.h', + 'vega10_enum.h', + ], 'gfx10': [ [0x00001260, 0x0000A000, 0x02402C00, 0, 0], # IP_BASE GC_BASE 'asic_reg/gc/gc_10_1_0_offset.h', @@ -73,7 +79,8 @@ def register_filter(gfx_level, name, offset, already_added): umd_ranges = [0xB] # Gfx context, uconfig, and perf counter registers - umd_ranges += [0x28, 0x30, 0x31, 0x34, 0x35, 0x36, 0x37] + if gfx_level != 'gfx940': + umd_ranges += [0x28, 0x30, 0x31, 0x34, 0x35, 0x36, 0x37] # Add all registers in the 0x8000 range for gfx6 if gfx_level == 'gfx6': @@ -667,6 +674,9 @@ enums_missing = { 'gfx9': { **missing_enums_gfx9, }, + 'gfx940': { + **missing_enums_gfx9, + }, 'gfx10': { **missing_enums_gfx81plus, "DB_DFSM_CONTROL__PUNCHOUT_MODE": DB_DFSM_CONTROL__PUNCHOUT_MODE, diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 4cf6ced..5df9f82 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -762,8 +762,11 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), - * allow launching waves out-of-order. (same as Vulkan) */ - S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7) | + * allow launching waves out-of-order. (same as Vulkan) + * Not available in gfx940. + */ + S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7 && + (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) | S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32); const uint *last_block = info->last_block; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 756c27c..6e85644 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5653,6 +5653,11 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) } } + if (!sscreen->info.has_graphics && sscreen->info.family >= CHIP_GFX940) { + si_pm4_set_reg(pm4, R_00B89C_COMPUTE_TG_CHUNK_SIZE, 0); + si_pm4_set_reg(pm4, R_00B8B4_COMPUTE_PGM_RSRC3, 0); + } + if (sctx->gfx_level >= GFX9 && sctx->gfx_level < GFX11) si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, sctx->gfx_level >= GFX10 ? 0x20 : 0); -- 2.7.4