DFSM has never been enabled by default because it was slower.
RadeonSI is also dropping support for this because they discovered
that's actually not efficient in practice.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10968>
enable wave32 for compute shaders (GFX10+)
``dccmsaa``
enable DCC for MSAA images
- ``dfsm``
- enable DFSM
``gewave32``
enable wave32 for vertex/tess/geometry shaders (GFX10+)
``localbos``
radeon_emit(cs, centroid_priority);
radeon_emit(cs, centroid_priority >> 32);
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
-
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
if (old_pipeline &&
old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
- pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
- old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
+ pipeline->graphics.binning.pa_sc_binner_cntl_0)
return;
bool binning_flush = false;
pipeline->graphics.binning.pa_sc_binner_cntl_0 |
S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- }
-
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
}
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
- }
-
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
}
RADV_PERFTEST_CS_WAVE_32 = 1u << 3,
RADV_PERFTEST_PS_WAVE_32 = 1u << 4,
RADV_PERFTEST_GE_WAVE_32 = 1u << 5,
- RADV_PERFTEST_DFSM = 1u << 6,
- RADV_PERFTEST_NO_SAM = 1u << 7,
- RADV_PERFTEST_SAM = 1u << 8,
+ RADV_PERFTEST_NO_SAM = 1u << 6,
+ RADV_PERFTEST_SAM = 1u << 7,
};
bool radv_init_trace(struct radv_device *device);
{"localbos", RADV_PERFTEST_LOCAL_BOS}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
{"bolist", RADV_PERFTEST_BO_LIST},
{"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32},
- {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM},
+ {"gewave32", RADV_PERFTEST_GE_WAVE_32},
{"nosam", RADV_PERFTEST_NO_SAM}, {"sam", RADV_PERFTEST_SAM},
{NULL, 0}};
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
- /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
- device->dfsm_allowed =
- device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
-
/* The maximum number of scratch waves. Scratch space isn't divided
* evenly between CUs. The number is only a function of the number of CUs.
* We can decrease the constant to decrease the scratch buffer size.
{
uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1);
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
}
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
}
struct radv_binning_settings
struct radv_binning_settings settings =
radv_get_binning_settings(pipeline->device->physical_device);
- bool disable_start_of_prim = true;
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
- const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
- !ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
- db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
- disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
- }
-
const uint32_t pa_sc_binner_cntl_0 =
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
- S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_DISABLE_START_OF_PRIM(1) |
S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
} else
radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
}
radeon_set_context_reg(
ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
-
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (pipeline->device->dfsm_allowed) {
- radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
}
static void
ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
ps->info.ps.writes_sample_mask));
-
- if (pipeline->device->dfsm_allowed) {
- /* optimise this? */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
}
static void
struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
- bool dfsm_allowed;
uint32_t tess_offchip_block_dw_size;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
struct radv_binning_state {
uint32_t pa_sc_binner_cntl_0;
- uint32_t db_dfsm_control;
};
#define SI_GS_PER_ES 128