nir: add nir_intrinsic_image_samples_identical
[platform/upstream/mesa.git] / src / compiler / nir / nir_lower_non_uniform_access.c
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 struct nu_handle {
28    nir_src *src;
29    nir_ssa_def *handle;
30    nir_deref_instr *parent_deref;
31    nir_ssa_def *first;
32 };
33
34 static bool
35 nu_handle_init(struct nu_handle *h, nir_src *src)
36 {
37    h->src = src;
38
39    nir_deref_instr *deref = nir_src_as_deref(*src);
40    if (deref) {
41       if (deref->deref_type == nir_deref_type_var)
42          return false;
43
44       nir_deref_instr *parent = nir_deref_instr_parent(deref);
45       assert(parent->deref_type == nir_deref_type_var);
46
47       assert(deref->deref_type == nir_deref_type_array);
48       if (nir_src_is_const(deref->arr.index))
49          return false;
50
51       assert(deref->arr.index.is_ssa);
52       h->handle = deref->arr.index.ssa;
53       h->parent_deref = parent;
54
55       return true;
56    } else {
57       if (nir_src_is_const(*src))
58          return false;
59
60       assert(src->is_ssa);
61       h->handle = src->ssa;
62       h->parent_deref = NULL;
63
64       return true;
65    }
66 }
67
68 static nir_ssa_def *
69 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
70                   nir_builder *b, struct nu_handle *handle)
71 {
72    nir_component_mask_t channel_mask = ~0;
73    if (options->callback)
74       channel_mask = options->callback(handle->src, options->callback_data);
75    channel_mask &= nir_component_mask(handle->handle->num_components);
76
77    nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];
78    for (unsigned i = 0; i < handle->handle->num_components; i++)
79       channels[i] = nir_channel(b, handle->handle, i);
80
81    handle->first = handle->handle;
82    nir_ssa_def *equal_first = nir_imm_true(b);
83    u_foreach_bit(i, channel_mask) {
84       nir_ssa_def *first = nir_read_first_invocation(b, channels[i]);
85       handle->first = nir_vector_insert_imm(b, handle->first, first, i);
86
87       equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
88    }
89
90    return equal_first;
91 }
92
93 static void
94 nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
95 {
96    if (h->parent_deref) {
97       /* Replicate the deref. */
98       nir_deref_instr *deref =
99          nir_build_deref_array(b, h->parent_deref, h->first);
100       *(h->src) = nir_src_for_ssa(&deref->dest.ssa);
101    } else {
102       *(h->src) = nir_src_for_ssa(h->first);
103    }
104 }
105
106 static bool
107 lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
108                              nir_builder *b, nir_tex_instr *tex)
109 {
110    if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
111       return false;
112
113    /* We can have at most one texture and one sampler handle */
114    unsigned num_handles = 0;
115    struct nu_handle handles[2];
116    for (unsigned i = 0; i < tex->num_srcs; i++) {
117       switch (tex->src[i].src_type) {
118       case nir_tex_src_texture_offset:
119       case nir_tex_src_texture_handle:
120       case nir_tex_src_texture_deref:
121          if (!tex->texture_non_uniform)
122             continue;
123          break;
124
125       case nir_tex_src_sampler_offset:
126       case nir_tex_src_sampler_handle:
127       case nir_tex_src_sampler_deref:
128          if (!tex->sampler_non_uniform)
129             continue;
130          break;
131
132       default:
133          continue;
134       }
135
136       assert(num_handles <= ARRAY_SIZE(handles));
137       if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
138          num_handles++;
139    }
140
141    if (num_handles == 0)
142       return false;
143
144    b->cursor = nir_instr_remove(&tex->instr);
145
146    nir_push_loop(b);
147
148    nir_ssa_def *all_equal_first = nir_imm_true(b);
149    for (unsigned i = 0; i < num_handles; i++) {
150       if (i && handles[i].handle == handles[0].handle) {
151          handles[i].first = handles[0].first;
152          continue;
153       }
154
155       nir_ssa_def *equal_first = nu_handle_compare(options, b, &handles[i]);
156       all_equal_first = nir_iand(b, all_equal_first, equal_first);
157    }
158
159    nir_push_if(b, all_equal_first);
160
161    for (unsigned i = 0; i < num_handles; i++)
162       nu_handle_rewrite(b, &handles[i]);
163
164    nir_builder_instr_insert(b, &tex->instr);
165    nir_jump(b, nir_jump_break);
166
167    tex->texture_non_uniform = false;
168    tex->sampler_non_uniform = false;
169
170    return true;
171 }
172
173 static bool
174 lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
175                                 nir_builder *b, nir_intrinsic_instr *intrin,
176                                 unsigned handle_src)
177 {
178    if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
179       return false;
180
181    struct nu_handle handle;
182    if (!nu_handle_init(&handle, &intrin->src[handle_src]))
183       return false;
184
185    b->cursor = nir_instr_remove(&intrin->instr);
186
187    nir_push_loop(b);
188
189    nir_push_if(b, nu_handle_compare(options, b, &handle));
190
191    nu_handle_rewrite(b, &handle);
192
193    nir_builder_instr_insert(b, &intrin->instr);
194    nir_jump(b, nir_jump_break);
195
196    nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
197
198    return true;
199 }
200
201 static bool
202 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
203                                   const nir_lower_non_uniform_access_options *options)
204 {
205    bool progress = false;
206
207    nir_builder b;
208    nir_builder_init(&b, impl);
209
210    nir_foreach_block_safe(block, impl) {
211       nir_foreach_instr_safe(instr, block) {
212          switch (instr->type) {
213          case nir_instr_type_tex: {
214             nir_tex_instr *tex = nir_instr_as_tex(instr);
215             if ((options->types & nir_lower_non_uniform_texture_access) &&
216                 lower_non_uniform_tex_access(options, &b, tex))
217                progress = true;
218             break;
219          }
220
221          case nir_instr_type_intrinsic: {
222             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
223             switch (intrin->intrinsic) {
224             case nir_intrinsic_load_ubo:
225                if ((options->types & nir_lower_non_uniform_ubo_access) &&
226                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
227                   progress = true;
228                break;
229
230             case nir_intrinsic_load_ssbo:
231             case nir_intrinsic_ssbo_atomic_add:
232             case nir_intrinsic_ssbo_atomic_imin:
233             case nir_intrinsic_ssbo_atomic_umin:
234             case nir_intrinsic_ssbo_atomic_imax:
235             case nir_intrinsic_ssbo_atomic_umax:
236             case nir_intrinsic_ssbo_atomic_and:
237             case nir_intrinsic_ssbo_atomic_or:
238             case nir_intrinsic_ssbo_atomic_xor:
239             case nir_intrinsic_ssbo_atomic_exchange:
240             case nir_intrinsic_ssbo_atomic_comp_swap:
241             case nir_intrinsic_ssbo_atomic_fadd:
242             case nir_intrinsic_ssbo_atomic_fmin:
243             case nir_intrinsic_ssbo_atomic_fmax:
244             case nir_intrinsic_ssbo_atomic_fcomp_swap:
245                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
246                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
247                   progress = true;
248                break;
249
250             case nir_intrinsic_store_ssbo:
251                /* SSBO Stores put the index in the second source */
252                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
253                    lower_non_uniform_access_intrin(options, &b, intrin, 1))
254                   progress = true;
255                break;
256
257             case nir_intrinsic_image_load:
258             case nir_intrinsic_image_sparse_load:
259             case nir_intrinsic_image_store:
260             case nir_intrinsic_image_atomic_add:
261             case nir_intrinsic_image_atomic_imin:
262             case nir_intrinsic_image_atomic_umin:
263             case nir_intrinsic_image_atomic_imax:
264             case nir_intrinsic_image_atomic_umax:
265             case nir_intrinsic_image_atomic_and:
266             case nir_intrinsic_image_atomic_or:
267             case nir_intrinsic_image_atomic_xor:
268             case nir_intrinsic_image_atomic_exchange:
269             case nir_intrinsic_image_atomic_comp_swap:
270             case nir_intrinsic_image_atomic_fadd:
271             case nir_intrinsic_image_atomic_fmin:
272             case nir_intrinsic_image_atomic_fmax:
273             case nir_intrinsic_image_size:
274             case nir_intrinsic_image_samples:
275             case nir_intrinsic_image_samples_identical:
276             case nir_intrinsic_bindless_image_load:
277             case nir_intrinsic_bindless_image_sparse_load:
278             case nir_intrinsic_bindless_image_store:
279             case nir_intrinsic_bindless_image_atomic_add:
280             case nir_intrinsic_bindless_image_atomic_imin:
281             case nir_intrinsic_bindless_image_atomic_umin:
282             case nir_intrinsic_bindless_image_atomic_imax:
283             case nir_intrinsic_bindless_image_atomic_umax:
284             case nir_intrinsic_bindless_image_atomic_and:
285             case nir_intrinsic_bindless_image_atomic_or:
286             case nir_intrinsic_bindless_image_atomic_xor:
287             case nir_intrinsic_bindless_image_atomic_exchange:
288             case nir_intrinsic_bindless_image_atomic_comp_swap:
289             case nir_intrinsic_bindless_image_atomic_fadd:
290             case nir_intrinsic_bindless_image_atomic_fmin:
291             case nir_intrinsic_bindless_image_atomic_fmax:
292             case nir_intrinsic_bindless_image_size:
293             case nir_intrinsic_bindless_image_samples:
294             case nir_intrinsic_bindless_image_samples_identical:
295             case nir_intrinsic_image_deref_load:
296             case nir_intrinsic_image_deref_sparse_load:
297             case nir_intrinsic_image_deref_store:
298             case nir_intrinsic_image_deref_atomic_add:
299             case nir_intrinsic_image_deref_atomic_umin:
300             case nir_intrinsic_image_deref_atomic_imin:
301             case nir_intrinsic_image_deref_atomic_umax:
302             case nir_intrinsic_image_deref_atomic_imax:
303             case nir_intrinsic_image_deref_atomic_and:
304             case nir_intrinsic_image_deref_atomic_or:
305             case nir_intrinsic_image_deref_atomic_xor:
306             case nir_intrinsic_image_deref_atomic_exchange:
307             case nir_intrinsic_image_deref_atomic_comp_swap:
308             case nir_intrinsic_image_deref_atomic_fadd:
309             case nir_intrinsic_image_deref_atomic_fmin:
310             case nir_intrinsic_image_deref_atomic_fmax:
311             case nir_intrinsic_image_deref_size:
312             case nir_intrinsic_image_deref_samples:
313             case nir_intrinsic_image_deref_samples_identical:
314                if ((options->types & nir_lower_non_uniform_image_access) &&
315                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
316                   progress = true;
317                break;
318
319             default:
320                /* Nothing to do */
321                break;
322             }
323             break;
324          }
325
326          default:
327             /* Nothing to do */
328             break;
329          }
330       }
331    }
332
333    if (progress)
334       nir_metadata_preserve(impl, nir_metadata_none);
335
336    return progress;
337 }
338
339 /**
340  * Lowers non-uniform resource access by using a loop
341  *
342  * This pass lowers non-uniform resource access by using subgroup operations
343  * and a loop.  Most hardware requires things like textures and UBO access
344  * operations to happen on a dynamically uniform (or at least subgroup
345  * uniform) resource.  This pass allows for non-uniform access by placing the
346  * texture instruction in a loop that looks something like this:
347  *
348  * loop {
349  *    bool tex_eq_first = readFirstInvocationARB(texture) == texture;
350  *    bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
351  *    if (tex_eq_first && smp_eq_first) {
352  *       res = texture(texture, sampler, ...);
353  *       break;
354  *    }
355  * }
356  *
357  * Fortunately, because the instruction is immediately followed by the only
358  * break in the loop, the block containing the instruction dominates the end
359  * of the loop.  Therefore, it's safe to move the instruction into the loop
360  * without fixing up SSA in any way.
361  */
362 bool
363 nir_lower_non_uniform_access(nir_shader *shader,
364                              const nir_lower_non_uniform_access_options *options)
365 {
366    bool progress = false;
367
368    nir_foreach_function(function, shader) {
369       if (function->impl &&
370           nir_lower_non_uniform_access_impl(function->impl, options))
371          progress = true;
372    }
373
374    return progress;
375 }