common/ac_gpu_info.h \
common/ac_msgpack.c \
common/ac_msgpack.h \
+ common/ac_nir.c \
common/ac_nir.h \
common/ac_nir_lower_esgs_io_to_mem.c \
common/ac_nir_lower_tess_io_to_mem.c \
--- /dev/null
+/*
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "ac_nir.h"
+
+bool
+ac_nir_lower_indirect_derefs(nir_shader *shader,
+ enum chip_class chip_class)
+{
+ bool progress = false;
+
+ /* Lower large variables to scratch first so that we won't bloat the
+ * shader by generating large if ladders for them. We later lower
+ * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+ */
+ NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
+ glsl_get_natural_size_align_bytes);
+
+ /* LLVM doesn't support VGPR indexing on GFX9. */
+ bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
+
+ /* TODO: Indirect indexing of GS inputs is unimplemented.
+ *
+ * TCS and TES load inputs directly from LDS or offchip memory, so
+ * indirect indexing is trivial.
+ */
+ nir_variable_mode indirect_mask = 0;
+ if (shader->info.stage == MESA_SHADER_GEOMETRY ||
+ (shader->info.stage != MESA_SHADER_TESS_CTRL && shader->info.stage != MESA_SHADER_TESS_EVAL &&
+ !llvm_has_working_vgpr_indexing)) {
+ indirect_mask |= nir_var_shader_in;
+ }
+ if (!llvm_has_working_vgpr_indexing && shader->info.stage != MESA_SHADER_TESS_CTRL)
+ indirect_mask |= nir_var_shader_out;
+
+ /* TODO: We shouldn't need to do this, however LLVM isn't currently
+ * smart enough to handle indirects without causing excess spilling
+ * causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ indirect_mask |= nir_var_function_temp;
+
+ progress |= nir_lower_indirect_derefs(shader, indirect_mask, UINT32_MAX);
+ return progress;
+}
enum chip_class chip_class,
unsigned num_reserved_es_outputs);
+bool
+ac_nir_lower_indirect_derefs(nir_shader *shader,
+ enum chip_class chip_class);
+
#ifdef __cplusplus
}
#endif
'ac_msgpack.c',
'ac_msgpack.h',
'ac_rgp_elf_object_pack.c',
+ 'ac_nir.c',
'ac_nir.h',
'ac_nir_lower_esgs_io_to_mem.c',
'ac_nir_lower_tess_io_to_mem.c',
ralloc_free(ctx.verified_interp);
}
-bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
-{
- bool progress = false;
-
- /* Lower large variables to scratch first so that we won't bloat the
- * shader by generating large if ladders for them. We later lower
- * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
- */
- NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
- glsl_get_natural_size_align_bytes);
-
- /* LLVM doesn't support VGPR indexing on GFX9. */
- bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
-
- /* TODO: Indirect indexing of GS inputs is unimplemented.
- *
- * TCS and TES load inputs directly from LDS or offchip memory, so
- * indirect indexing is trivial.
- */
- nir_variable_mode indirect_mask = 0;
- if (nir->info.stage == MESA_SHADER_GEOMETRY ||
- (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL &&
- !llvm_has_working_vgpr_indexing)) {
- indirect_mask |= nir_var_shader_in;
- }
- if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL)
- indirect_mask |= nir_var_shader_out;
-
- /* TODO: We shouldn't need to do this, however LLVM isn't currently
- * smart enough to handle indirects without causing excess spilling
- * causing the gpu to hang.
- *
- * See the following thread for more details of the problem:
- * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
- */
- indirect_mask |= nir_var_function_temp;
-
- progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
- return progress;
-}
-
static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
{
if (intrin->intrinsic != nir_intrinsic_store_output)
return (index * 4) + chan;
}
-bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
-
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
#include "ac_exp_param.h"
#include "ac_llvm_util.h"
#include "ac_nir_to_llvm.h"
+#include "ac_nir.h"
#include "ac_shader_util.h"
#include "aco_interface.h"
#include "sid.h"
if (progress) {
if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
- ac_lower_indirect_derefs(ordered_shaders[i],
- pipeline->device->physical_device->rad_info.chip_class);
+ ac_nir_lower_indirect_derefs(ordered_shaders[i],
+ pipeline->device->physical_device->rad_info.chip_class);
/* remove dead writes, which can remove input loads */
nir_lower_vars_to_ssa(ordered_shaders[i]);
nir_opt_dce(ordered_shaders[i]);
}
if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
- ac_lower_indirect_derefs(ordered_shaders[i - 1],
- pipeline->device->physical_device->rad_info.chip_class);
+ ac_nir_lower_indirect_derefs(ordered_shaders[i - 1],
+ pipeline->device->physical_device->rad_info.chip_class);
}
}
}
* bloat the instruction count of the loop and cause it to be
* considered too large for unrolling.
*/
- if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
+ if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
nir->info.stage != MESA_SHADER_COMPUTE) {
/* Optimize the lowered code before the linking optimizations. */
*/
#include "ac_nir_to_llvm.h"
+#include "ac_nir.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_deref.h"
NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
}
- changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
+ changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class);
if (changed)
si_nir_opts(sscreen, nir, false);