From 3914bd457b6e91c805a6ed9977ed984205f8acae Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Mon, 26 Jul 2021 10:47:20 +0200 Subject: [PATCH] amd/registers: fix fields conflict detection MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The existing code handled the case where the new definition of the same field was larger than the old one. This commit adds a check to handle the reverse case: the new def is smaller than the old one (= so writing using the merged macro would affect the next fields). The affected fields are: * LGKM_CNT (in SQ_WAVE_IB_STS) * DONUT_SPLIT (in VGT_TESS_DISTRIBUTION) * HEAD_QUEUE (in GDS_GWS_RESOURCE) DONUT_SPLIT is the only one used by radeonsi/radv. Fixes: e6184b08924 ("amd/registers: scripts for processing register descriptions in JSON") Reviewed-by: Marek Olšák Part-of: --- src/amd/registers/makeregheader.py | 32 +++++++++++++++++++------------- src/amd/vulkan/si_cmd_buffer.c | 4 ++-- src/gallium/drivers/radeonsi/si_state.c | 4 ++-- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/amd/registers/makeregheader.py b/src/amd/registers/makeregheader.py index 472e6e4..770d678 100644 --- a/src/amd/registers/makeregheader.py +++ b/src/amd/registers/makeregheader.py @@ -112,6 +112,23 @@ def get_chips_comment(chips, parent=None): return ', '.join(comment) +def detect_conflict(regdb, field_in_type1, field_in_type2): + """ + Returns False if field_in_type1 and field_in_type2 can be merged + into a single field = if writing to field_in_type1 bits won't + overwrite adjacent fields in type2, and the other way around. + """ + for idx, type_refs in enumerate([field_in_type1.type_refs, field_in_type2.type_refs]): + ref = field_in_type2 if idx == 0 else field_in_type1 + for type_ref in type_refs: + for field in regdb.register_type(type_ref).fields: + # If a different field in the other type starts in + # the tested field's bits[0, 1] interval + if (field.bits[0] > ref.bits[0] and + field.bits[0] <= ref.bits[1]): + return True + + return False class HeaderWriter(object): def __init__(self, regdb, guard=None): @@ -200,21 +217,10 @@ class HeaderWriter(object): if prev.bits[0] != line.bits[0]: continue - if prev.bits[1] < line.bits[1]: + if prev.bits[1] != line.bits[1]: # Current line's field extends beyond the range of prev. # Need to check for conflicts - conflict = False - for type_ref in prev.type_refs: - for field in regdb.register_type(type_ref).fields: - # The only possible conflict is for a prev field - # that starts at a higher bit. - if (field.bits[0] > line.bits[0] and - field.bits[0] <= line.bits[1]): - conflict = True - break - if conflict: - break - if conflict: + if detect_conflict(regdb, prev, line): continue prev.bits[1] = max(prev.bits[1], line.bits[1]) diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index b04ee11..bd0021e 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -503,13 +503,13 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.chip_class >= GFX9) { radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | - S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT(24) | + S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); } else if (physical_device->rad_info.chip_class >= GFX8) { uint32_t vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16); + S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); if (physical_device->rad_info.family == CHIP_FIJI || physical_device->rad_info.family >= CHIP_POLARIS10) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 95aa12e..db7252e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5330,7 +5330,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) unsigned vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16); + S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); /* Testing with Unigine Heaven extreme tesselation yielded best results * with TRAP_SPLIT = 3. @@ -5361,7 +5361,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | - S_028B50_DONUT_SPLIT(24) | S_028B50_TRAP_SPLIT(6)); + S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | S_028C48_MAX_PRIM_PER_BATCH(1023)); -- 2.7.4