From 213491600a09aae15efa128fd6d4c46950fa4b4a Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 21 Jun 2018 09:45:19 +0200 Subject: [PATCH] intel/compiler: emit actual barriers for working-group level barriers Until now we have assumed that we could skip emitting these barriers in the general case based on empirical testing and a few assumptions detailed in a comment in the driver code, however, recent CTS tests have showed that we actually need them to produce correct behavior. Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs_nir.cpp | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 62ec0df..e983110 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3926,6 +3926,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_shared: case nir_intrinsic_memory_barrier_atomic_counter: case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: @@ -3937,29 +3939,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_group_memory_barrier: - case nir_intrinsic_memory_barrier_shared: - /* We treat these workgroup-level barriers as no-ops. This should be - * safe at present and as long as: - * - * - Memory access instructions are not subsequently reordered by the - * compiler back-end. - * - * - All threads from a given compute shader workgroup fit within a - * single subslice and therefore talk to the same HDC shared unit - * what supposedly guarantees ordering and coherency between threads - * from the same workgroup. This may change in the future when we - * start splitting workgroups across multiple subslices. - * - * - The context is not in fault-and-stream mode, which could cause - * memory transactions (including to SLM) prior to the barrier to be - * replayed after the barrier if a pagefault occurs. This shouldn't - * be a problem up to and including SKL because fault-and-stream is - * not usable due to hardware issues, but that's likely to change in - * the future. - */ - break; - case nir_intrinsic_shader_clock: { /* We cannot do anything if there is an event, so ignore it for now */ const fs_reg shader_clock = get_timestamp(bld); -- 2.7.4