pan/bi: Lower stores with component != 0
authorAlyssa Rosenzweig <alyssa@collabora.com>
Wed, 2 Jun 2021 14:46:57 +0000 (10:46 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 10 Jun 2021 18:06:10 +0000 (18:06 +0000)
If the shader packs multiple varyings into the same location with
different location_frac, we'll need to lower to a single varying store
that collects all of the channels together. This is not trivial during
code gen, but it is trivial to do in NIR right before codegen by relying
on nir_lower_io_to_temporaries. Since we're guaranteed all varyings will
be written exactly once, in the exit block, we can scan the shader
linearly and collect stores together in a single pass.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11123>

src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt
src/panfrost/bifrost/bifrost_compile.c

index 18517e9..d12c141 100644 (file)
@@ -14,19 +14,7 @@ dEQP-GLES31.functional.draw_indirect.draw_elements_indirect.line_strip.instanced
 dEQP-GLES31.functional.draw_indirect.random.31,Fail
 dEQP-GLES31.functional.layout_binding.image.image2d.vertex_binding_max_array,Fail
 dEQP-GLES31.functional.layout_binding.image.image3d.vertex_binding_max_array,Fail
-dEQP-GLES31.functional.separate_shader.random.22,Fail
 dEQP-GLES31.functional.separate_shader.random.23,Fail
 dEQP-GLES31.functional.separate_shader.random.35,Fail
 dEQP-GLES31.functional.separate_shader.random.68,Fail
 dEQP-GLES31.functional.separate_shader.random.79,Fail
-dEQP-GLES31.functional.separate_shader.random.80,Fail
-dEQP-GLES31.functional.separate_shader.random.89,Fail
-dEQP-GLES31.functional.draw_base_vertex.draw_elements_base_vertex.builtin_variable.vertex_id,Fail
-dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.builtin_variable.vertex_id,Fail
-dEQP-GLES31.functional.draw_base_vertex.draw_range_elements_base_vertex.builtin_variable.vertex_id,Fail
-dEQP-GLES31.functional.separate_shader.interface.same_location_vertex_flat_fragment_flat,Fail
-dEQP-GLES31.functional.separate_shader.interface.same_location_vertex_smooth_fragment_centroid,Fail
-dEQP-GLES31.functional.separate_shader.interface.same_name_vertex_flat_fragment_flat,Fail
-dEQP-GLES31.functional.separate_shader.pipeline.different_constant_separate_programs_add_fragment,Fail
-dEQP-GLES31.functional.separate_shader.pipeline.same_constant_separate_programs_add_both,Fail
-dEQP-GLES31.functional.separate_shader.program_uniform.separate_programs_add_fragment,Fail
\ No newline at end of file
index a611b27..b27e45c 100644 (file)
@@ -3199,6 +3199,64 @@ bi_opt_post_ra(bi_context *ctx)
         }
 }
 
+static bool
+bifrost_nir_lower_store_component(struct nir_builder *b,
+                nir_instr *instr, void *data)
+{
+        if (instr->type != nir_instr_type_intrinsic)
+                return false;
+
+        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+        if (intr->intrinsic != nir_intrinsic_store_output)
+                return false;
+
+        struct hash_table_u64 *slots = data;
+        unsigned component = nir_intrinsic_component(intr);
+        nir_src *slot_src = nir_get_io_offset_src(intr);
+        uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
+
+        nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
+        unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
+
+        nir_ssa_def *value = intr->src[0].ssa;
+        b->cursor = nir_before_instr(&intr->instr);
+
+        nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
+        nir_ssa_def *channels[4] = { undef, undef, undef, undef };
+
+        /* Copy old */
+        u_foreach_bit(i, mask) {
+                assert(prev != NULL);
+                nir_ssa_def *prev_ssa = prev->src[0].ssa;
+                channels[i] = nir_channel(b, prev_ssa, i);
+        }
+
+        /* Copy new */
+        unsigned new_mask = nir_intrinsic_write_mask(intr);
+        mask |= (new_mask << component);
+
+        u_foreach_bit(i, new_mask) {
+                assert(component + i < 4);
+                channels[component + i] = nir_channel(b, value, i);
+        }
+
+        intr->num_components = util_last_bit(mask);
+        nir_instr_rewrite_src_ssa(instr, &intr->src[0], 
+                        nir_vec(b, channels, intr->num_components));
+
+        nir_intrinsic_set_component(intr, 0);
+        nir_intrinsic_set_write_mask(intr, mask);
+
+        if (prev) {
+                _mesa_hash_table_u64_remove(slots, slot);
+                nir_instr_remove(&prev->instr);
+        }
+
+        _mesa_hash_table_u64_insert(slots, slot, intr);
+        return false;
+}
+
 /* Dead code elimination for branches at the end of a block - only one branch
  * per block is legal semantically, but unreachable jumps can be generated.
  * Likewise we can generate jumps to the terminal block which need to be
@@ -3273,6 +3331,12 @@ bifrost_compile_shader_nir(nir_shader *nir,
         if (ctx->stage == MESA_SHADER_FRAGMENT) {
                 NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
                                 ~0, false);
+        } else {
+                struct hash_table_u64 *stores = _mesa_hash_table_u64_create(ctx);
+                NIR_PASS_V(nir, nir_shader_instructions_pass,
+                                bifrost_nir_lower_store_component,
+                                nir_metadata_block_index |
+                                nir_metadata_dominance, stores);
         }
 
         NIR_PASS_V(nir, nir_lower_ssbo);