nir_shader *s = nir_shader_clone(NULL, ir);
+ /* While graphics shaders are preprocessed at CSO create time, compute
+ * kernels are not preprocessed until they're cloned since the driver does
+ * not get ownership of the NIR from compute CSOs. Do this preprocessing now.
+ * Compute CSOs call this function during create time, so preprocessing
+ * happens at CSO create time regardless.
+ */
+ if (gl_shader_stage_is_compute(s->info.stage))
+ pan_shader_preprocess(s, dev->gpu_id);
+
struct panfrost_compile_inputs inputs = {
.debug = dbg,
.gpu_id = dev->gpu_id,
}
util_dynarray_init(&out->binary, NULL);
+ pan_shader_preprocess(s, inputs.gpu_id);
+
+ if (dev->arch <= 5 && s->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(s, pan_lower_framebuffer, key->fs.rt_formats,
+ pan_raw_format_mask_midgard(key->fs.rt_formats), false,
+ dev->gpu_id < 0x700);
+ }
+
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
assert(req_local_mem >= out->info.wls_size);
}
static bool
-bi_lower_load_output(nir_builder *b, nir_instr *instr, void *data)
+bi_lower_load_output(nir_builder *b, nir_instr *instr, UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_ssa_def *conversion = nir_load_rt_conversion_pan(
b, .base = rt, .src_type = nir_intrinsic_dest_type(intr));
- /* TODO: This should be optimized/lowered by the driver */
- const struct panfrost_compile_inputs *inputs = data;
-
- if (inputs->is_blend) {
- conversion = nir_imm_int(b, inputs->blend.bifrost_blend_desc >> 32);
- } else if (inputs->bifrost.static_rt_conv) {
- conversion = nir_imm_int(b, inputs->bifrost.rt_conv[rt]);
- }
-
nir_ssa_def *lowered = nir_load_converted_output_pan(
b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest),
conversion, .dest_type = nir_intrinsic_dest_type(intr),
}
void
-bifrost_preprocess_nir(nir_shader *nir,
- const struct panfrost_compile_inputs *inputs)
+bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
* (currently unconditional for Valhall), we force vec4 alignment for
* scratch access.
*/
- bool packed_tls = (inputs->gpu_id >= 0x9000);
+ bool packed_tls = (gpu_id >= 0x9000);
/* Lower large arrays to scratch and small arrays to bcsel */
NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
nir_metadata_block_index | nir_metadata_dominance, NULL);
NIR_PASS_V(nir, nir_shader_instructions_pass, bi_lower_load_output,
- nir_metadata_block_index | nir_metadata_dominance,
- (void *)inputs);
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
- if (inputs->gpu_id >= 0x9000) {
+ if (gpu_id >= 0x9000) {
NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
BITFIELD64_BIT(VARYING_SLOT_PSIZ), false);
}
{
bifrost_debug = debug_get_option_bifrost_debug();
- bifrost_preprocess_nir(nir, inputs);
bi_optimize_nir(nir, inputs->gpu_id, inputs->is_blend);
struct hash_table_u64 *sysval_to_id =
#include "panfrost/util/pan_ir.h"
#include "util/u_dynarray.h"
-void bifrost_preprocess_nir(nir_shader *nir,
- const struct panfrost_compile_inputs *inputs);
+void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
return res;
}
+
+struct rt_conversion_inputs {
+ const struct panfrost_device *dev;
+ enum pipe_format *formats;
+};
+
+static bool
+inline_rt_conversion(nir_builder *b, nir_instr *instr, void *data)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_rt_conversion_pan)
+ return false;
+
+ struct rt_conversion_inputs *inputs = data;
+ unsigned rt = nir_intrinsic_base(intr);
+ unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
+ uint64_t conversion = GENX(pan_blend_get_internal_desc)(
+ inputs->dev, inputs->formats[rt], rt, size, false);
+
+ b->cursor = nir_after_instr(instr);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_imm_int(b, conversion >> 32));
+ return true;
+}
+
+bool
+GENX(pan_inline_rt_conversion)(nir_shader *s, const struct panfrost_device *dev,
+ enum pipe_format *formats)
+{
+ return nir_shader_instructions_pass(
+ s, inline_rt_conversion,
+ nir_metadata_block_index | nir_metadata_dominance,
+ &(struct rt_conversion_inputs){.dev = dev, .formats = formats});
+}
#endif
struct pan_blend_shader_variant *
#endif
struct pan_shader_info info;
+ pan_shader_preprocess(nir, inputs.gpu_id);
+
+#if PAN_ARCH >= 6
+ NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), dev, inputs.rt_formats);
+#endif
GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
uint64_t GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered);
+
+bool GENX(pan_inline_rt_conversion)(nir_shader *s,
+ const struct panfrost_device *dev,
+ enum pipe_format *formats);
#endif
/* Take blend_shaders.lock before calling this function and release it when
for (unsigned i = 0; i < active_count; ++i)
BITSET_SET(b.shader->info.textures_used, i);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
+
if (PAN_ARCH == 4) {
NIR_PASS_V(b.shader, nir_shader_instructions_pass,
lower_sampler_parameters,
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info);
ralloc_free(b.shader);
#endif
#endif
+/* This is only needed on Midgard. It's the same on both v4 and v5, so only
+ * compile once to avoid the GenXML dependency for calls.
+ */
+#if PAN_ARCH == 5
+uint8_t
+pan_raw_format_mask_midgard(enum pipe_format *formats)
+{
+ uint8_t out = 0;
+
+ for (unsigned i = 0; i < 8; i++) {
+ enum pipe_format fmt = formats[i];
+ unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
+
+ if (wb_fmt < MALI_COLOR_FORMAT_R8)
+ out |= BITFIELD_BIT(i);
+ }
+
+ return out;
+}
+#endif
+
void
GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
#if PAN_ARCH >= 6
bifrost_compile_shader_nir(s, inputs, binary, info);
#else
- for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) {
- enum pipe_format fmt = inputs->rt_formats[i];
- unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
-
- if (wb_fmt < MALI_COLOR_FORMAT_R8)
- inputs->raw_fmt_mask |= BITFIELD_BIT(i);
- }
-
midgard_compile_shader_nir(s, inputs, binary, info);
#endif
struct panfrost_device;
+void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
+
+static inline void
+pan_shader_preprocess(nir_shader *nir, unsigned gpu_id)
+{
+ if (pan_arch(gpu_id) >= 6)
+ bifrost_preprocess_nir(nir, gpu_id);
+ else
+ midgard_preprocess_nir(nir, gpu_id);
+}
+
+uint8_t pan_raw_format_mask_midgard(enum pipe_format *formats);
+
#ifdef PAN_ARCH
const nir_shader_compiler_options *GENX(pan_shader_get_compiler_options)(void);
#include "util/u_dynarray.h"
#include "util/u_math.h"
-#include "panfrost/util/pan_lower_framebuffer.h"
#include "compiler.h"
#include "helpers.h"
#include "midgard.h"
}
void
-midgard_preprocess_nir(nir_shader *nir,
- const struct panfrost_compile_inputs *inputs)
+midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
- unsigned quirks = midgard_get_quirks(inputs->gpu_id);
+ unsigned quirks = midgard_get_quirks(gpu_id);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
/* TEX_GRAD fails to apply sampler descriptor settings on some
- * implementations, requiring a lowering. However, blit shaders do not
- * use the affected settings and should skip the workaround.
+ * implementations, requiring a lowering.
*/
- if ((quirks & MIDGARD_BROKEN_LOD) && !inputs->is_blit)
+ if (quirks & MIDGARD_BROKEN_LOD)
NIR_PASS_V(nir, midgard_nir_lod_errata);
/* Midgard image ops coordinates are 16-bit instead of 32-bit */
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
NIR_PASS_V(nir, nir_lower_var_copies);
-
- if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- NIR_PASS_V(nir, pan_lower_framebuffer, inputs->rt_formats,
- inputs->raw_fmt_mask, inputs->is_blend,
- quirks & MIDGARD_BROKEN_BLEND_LOADS);
- }
}
static void
ctx->ssa_constants = _mesa_hash_table_u64_create(ctx);
- midgard_preprocess_nir(nir, inputs);
-
/* Collect varyings after lowering I/O */
pan_nir_collect_varyings(nir, info);
#include "panfrost/util/pan_ir.h"
#include "util/u_dynarray.h"
-void midgard_preprocess_nir(nir_shader *nir,
- const struct panfrost_compile_inputs *inputs);
+void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void midgard_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
bool no_ubo_to_push;
enum pipe_format rt_formats[8];
- uint8_t raw_fmt_mask;
/* Used on Valhall.
*
union {
struct {
- bool static_rt_conv;
uint32_t rt_conv[8];
} bifrost;
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = 4;
.no_ubo_to_push = true,
};
- pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
- cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
- cfg.register_format = dstcompsz == 2 ?
- MALI_REGISTER_FILE_FORMAT_U16 :
- MALI_REGISTER_FILE_FORMAT_U32;
- }
- inputs.bifrost.static_rt_conv = true;
-
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
+ NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), pdev, &dstfmt);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->fs.sample_shading = is_ms;
.no_ubo_to_push = true,
};
- pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
- cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
- cfg.register_format = imgcompsz == 2 ?
- MALI_REGISTER_FILE_FORMAT_U16 :
- MALI_REGISTER_FILE_FORMAT_U32;
- }
- inputs.bifrost.static_rt_conv = true;
-
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
+
+ enum pipe_format rt_formats[8] = {key.imgfmt};
+ NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), pdev, rt_formats);
+
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO;
rt_state->equation.alpha_invert_dst_factor = false;
lower_blend = true;
-
- inputs->bifrost.static_rt_conv = true;
- inputs->bifrost.rt_conv[rt] =
- GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32;
}
if (lower_blend) {
nir_print_shader(nir, stderr);
}
+ pan_shader_preprocess(nir, inputs.gpu_id);
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ enum pipe_format rt_formats[MAX_RTS] = {PIPE_FORMAT_NONE};
+
+ for (unsigned rt = 0; rt < MAX_RTS; ++rt)
+ rt_formats[rt] = blend_state->rts[rt].format;
+
+ NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), pdev, rt_formats);
+ }
+
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);
/* System values shouldn't have changed */