From 7893040f807f2b81d03a6a19c577d6d98bcc116c Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 5 Dec 2022 00:54:14 +0100 Subject: [PATCH] radv: Add stricter space checks. The check for max_dw means that none of checks triggered reliably when we had an issue. Use a stricter reserved dw measure to increase the probability of catching issues. Adds a radeon_check_space to some places after cs_create as they previously relied on the min. cs size, but that would still trigger the checks. Part-of: --- src/amd/vulkan/radv_cs.h | 22 ++++++++++++---------- src/amd/vulkan/radv_pipeline_compute.c | 2 +- src/amd/vulkan/radv_pipeline_graphics.c | 4 ++-- src/amd/vulkan/radv_queue.c | 1 + src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/radv_sqtt.c | 4 ++++ src/amd/vulkan/si_cmd_buffer.c | 2 ++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 10 ++++++++++ 8 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 351d8e2..822e0b0 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -34,8 +34,10 @@ static inline unsigned radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed) { + assert(cs->cdw <= cs->reserved_dw); if (cs->max_dw - cs->cdw < needed) ws->cs_grow(cs, needed); + cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed); return cs->cdw + needed; } @@ -43,7 +45,7 @@ static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); + assert(cs->cdw + 2 + num <= cs->reserved_dw); assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2); @@ -60,7 +62,7 @@ static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); + assert(cs->cdw + 2 + num <= cs->reserved_dw); assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); @@ -77,7 +79,7 @@ static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value) { assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); + assert(cs->cdw + 3 <= cs->reserved_dw); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); radeon_emit(cs, value); @@ -87,7 +89,7 @@ static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); + assert(cs->cdw + 2 + num <= cs->reserved_dw); assert(num); radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); @@ -105,7 +107,7 @@ radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_ unsigned reg, unsigned idx, unsigned value) { assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); + assert(cs->cdw + 3 <= cs->reserved_dw); assert(idx); unsigned opcode = PKT3_SET_SH_REG_INDEX; @@ -121,7 +123,7 @@ static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); + assert(cs->cdw + 2 + num <= cs->reserved_dw); assert(num); radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); @@ -131,7 +133,7 @@ static inline void radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); + assert(cs->cdw + 2 + num <= cs->reserved_dw); assert(num); radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); @@ -149,7 +151,7 @@ radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct ra unsigned reg, unsigned idx, unsigned value) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); + assert(cs->cdw + 3 <= cs->reserved_dw); assert(idx); unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; @@ -167,7 +169,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne { struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); + assert(cs->cdw + 3 <= cs->reserved_dw); /* * On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM), @@ -186,7 +188,7 @@ static inline void radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { assert(reg < CIK_UCONFIG_REG_OFFSET); - assert(cs->cdw + 6 <= cs->max_dw); + assert(cs->cdw + 6 <= cs->reserved_dw); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF)); diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 2c03dec..8538c6b 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -103,7 +103,7 @@ radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_ struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE]; struct radeon_cmdbuf *cs = &pipeline->base.cs; - cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16; + cs->reserved_dw = cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16; cs->buf = malloc(cs->max_dw * 4); radv_pipeline_emit_hw_cs(pdevice, cs, shader); diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index db5a516..ad90277 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -3751,8 +3751,8 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs; struct radeon_cmdbuf *cs = &pipeline->base.cs; - cs->max_dw = 64; - ctx_cs->max_dw = 256; + cs->reserved_dw = cs->max_dw = 64; + ctx_cs->reserved_dw = ctx_cs->max_dw = 256; cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); ctx_cs->buf = cs->buf + cs->max_dw; diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index f301493..935cfe0 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -1064,6 +1064,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi goto fail; } + radeon_check_space(ws, cs, 512); dest_cs[i] = cs; if (scratch_bo) diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index c269935..cbd7a0c 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -114,6 +114,7 @@ struct radeon_cmdbuf { * store and reload them between buf writes. */ uint64_t cdw; /* Number of used dwords. */ uint64_t max_dw; /* Maximum number of dwords. */ + uint64_t reserved_dw; /* Number of dwords reserved through radeon_check_space() */ uint32_t *buf; /* The base pointer of the chunk. */ }; diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 6fb0818..a642651 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -687,6 +687,8 @@ radv_begin_sqtt(struct radv_queue *queue) if (!cs) return false; + radeon_check_space(ws, cs, 256); + switch (family) { case RADV_QUEUE_GENERAL: radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); @@ -756,6 +758,8 @@ radv_end_sqtt(struct radv_queue *queue) if (!cs) return false; + radeon_check_space(ws, cs, 256); + switch (family) { case RADV_QUEUE_GENERAL: radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 93041c6..9e1687c 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -657,6 +657,8 @@ cik_create_gfx_config(struct radv_device *device) if (!cs) return; + radeon_check_space(device->ws, cs, 512); + si_emit_graphics(device, cs); while (cs->cdw & 7) { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 8496703..f4054cb 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -388,6 +388,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size) cs->base.buf = (uint32_t *)cs->ib_mapped; cs->base.cdw = 0; + cs->base.reserved_dw = 0; cs->base.max_dw = ib_size / 4 - 4; } @@ -397,6 +398,8 @@ radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs) struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); enum amd_ip_type ip_type = cs->hw_ip; + assert(cs->base.cdw <= cs->base.reserved_dw); + uint32_t ib_pad_dw_mask = MAX2(3, cs->ws->info.ib_pad_dw_mask[ip_type]); uint32_t nop_packet = get_nop_packet(cs); @@ -442,6 +445,7 @@ radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); cs->base.cdw = 0; + cs->base.reserved_dw = 0; cs->status = VK_SUCCESS; for (unsigned i = 0; i < cs->num_buffers; ++i) { @@ -670,6 +674,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm if (parent->base.cdw + 4 > parent->base.max_dw) radv_amdgpu_cs_grow(&parent->base, 4); + parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + 4); + /* Not setting the CHAIN bit will launch an IB2. */ radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); radeon_emit(&parent->base, child->ib.ib_mc_address); @@ -686,6 +692,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm if (parent->base.cdw + ib->cdw > parent->base.max_dw) radv_amdgpu_cs_grow(&parent->base, ib->cdw); + parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + ib->cdw); + mapped = ws->base.buffer_map(ib->bo); if (!mapped) { parent->status = VK_ERROR_OUT_OF_HOST_MEMORY; @@ -704,6 +712,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm if (parent->base.cdw + child->base.cdw > parent->base.max_dw) radv_amdgpu_cs_grow(&parent->base, child->base.cdw); + parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + child->base.cdw); + memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw); parent->base.cdw += child->base.cdw; } -- 2.7.4