To avoid any alignment issues that triggers memory violations and
eventually a GPU. This can happen if the stride (static or dynamic)
is unaligned and also if the VBO offset is aligned to scalar
(eg. stride is 8 and VBO offset is 2 for R16G16B16A16_SNORM).
The AMD Windows driver also always splits typed vertex fetches.
fossils-db (Sienna Cichlid):
Totals from 56508 (40.54% of 139391) affected shaders:
SGPRs:
2643545 ->
2664516 (+0.79%); split: -0.19%, +0.98%
VGPRs:
2007472 ->
1995408 (-0.60%); split: -0.74%, +0.13%
CodeSize:
70596372 ->
73913312 (+4.70%); split: -0.00%, +4.70%
MaxWaves: 772653 -> 774916 (+0.29%); split: +0.37%, -0.08%
Instrs:
14074162 ->
14567072 (+3.50%); split: -0.00%, +3.51%
Cycles:
69281276 ->
71253252 (+2.85%); split: -0.00%, +2.85%
VMEM:
22047039 ->
25554196 (+15.91%); split: +17.20%, -1.29%
SMEM:
4120370 ->
4360820 (+5.84%); split: +7.41%, -1.58%
VClause: 416913 -> 438361 (+5.14%); split: -1.86%, +7.01%
SClause: 536739 -> 542637 (+1.10%); split: -0.33%, +1.43%
Copies: 977194 -> 970015 (-0.73%); split: -2.43%, +1.69%
Branches: 241205 -> 241193 (-0.00%); split: -0.06%, +0.06%
PreVGPRs:
1505645 ->
1505379 (-0.02%)
This fixes GPU hangs with bin/draw-vertices from Piglit on GFX10+
with Zink.
Cc: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7751>
bool check_vertex_fetch_size(isel_context *ctx, const ac_data_format_info *vtx_info,
unsigned offset, unsigned stride, unsigned channels)
{
- unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
if (vtx_info->chan_byte_size != 4 && channels == 3)
return false;
+
+ /* Always split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
+ * alignment issues that triggers memory violations and eventually a GPU
+ * hang. This can happen if the stride (static or dynamic) is unaligned and
+ * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+ * offset is 2 for R16G16B16A16_SNORM).
+ */
return (ctx->options->chip_class >= GFX7 && ctx->options->chip_class <= GFX9) ||
- (offset % vertex_byte_size == 0 && stride % vertex_byte_size == 0);
+ (channels == 1);
}
uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_info,
t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
- /* Perform per-channel vertex fetch operations if unaligned
- * access are detected. Only GFX6 and GFX10 are affected.
+ /* Always split typed vertex buffer loads on GFX6 and GFX10+
+ * to avoid any alignment issues that triggers memory
+ * violations and eventually a GPU hang. This can happen if
+ * the stride (static or dynamic) is unaligned and also if the
+ * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+ * offset is 2 for R16G16B16A16_SNORM).
*/
- bool unaligned_vertex_fetches = false;
- if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) &&
- vtx_info->chan_format != data_format &&
- ((attrib_offset % vtx_info->element_size) ||
- (attrib_stride % vtx_info->element_size)))
- unaligned_vertex_fetches = true;
-
- if (unaligned_vertex_fetches) {
+ if (ctx->ac.chip_class == GFX6 ||
+ ctx->ac.chip_class >= GFX10) {
unsigned chan_format = vtx_info->chan_format;
LLVMValueRef values[4];