info->family == CHIP_BONAIRE ||
info->family == CHIP_KABINI;
+ /* HW bug workaround with async compute dispatches when threadgroup > 4096.
+ * The workaround is to change the "threadgroup" dimension mode to "thread"
+ * dimension mode.
+ */
+ info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
+ info->family == CHIP_TONGA;
+
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
* feature version wasn't bumped.
*/
bool has_two_planes_iterate256_bug;
bool has_vgt_flush_ngg_legacy_bug;
bool has_cs_regalloc_hang_bug;
+ bool has_async_compute_threadgroup_bug;
bool has_32bit_predication;
bool has_3d_cube_border_color_mipmap;
bool has_image_opcodes;
radeon_emit(cs, dispatch_initiator);
}
} else {
+ const unsigned *cs_block_size = compute_shader->info.cs.block_size;
unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
if (info->unaligned) {
- const unsigned *cs_block_size = compute_shader->info.cs.block_size;
unsigned remainder[3];
/* If aligned, these should be an entire block size,
predicating = false;
}
+ if (cmd_buffer->device->physical_device->rad_info.has_async_compute_threadgroup_bug &&
+ cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
+ for (unsigned i = 0; i < 3; i++) {
+ if (info->unaligned) {
+ /* info->blocks is already in thread dimensions for unaligned dispatches. */
+ blocks[i] = info->blocks[i];
+ } else {
+ /* Force the async compute dispatch to be in "thread" dim mode to workaround a hw bug. */
+ blocks[i] *= cs_block_size[i];
+ }
+
+ dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
+ }
+ }
+
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, blocks[0]);
radeon_emit(cs, blocks[1]);