From dda1ae9b3cb9ea39c9435fba01c6c31a99c4d35e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 18 Sep 2018 15:53:54 -0700 Subject: [PATCH] gallium/ttn: Convert inputs and outputs to derefs of variables. This means that TTN shaders more closely resemble GTN shaders: they have inputs and outputs as variable derefs, with the variables having their .driver_location already set up for you. This will be useful for v3d to do input variable DCE in NIR, which we can't do when the TTN shaders never have a pre-nir_lower_io stage. Acked-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 114 ++++++++++++------------- src/gallium/drivers/freedreno/ir3/ir3_shader.c | 5 +- src/gallium/drivers/v3d/v3d_program.c | 7 +- src/gallium/drivers/vc4/vc4_program.c | 7 +- 4 files changed, 64 insertions(+), 69 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 4f7f900..0ad274b 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -65,6 +65,9 @@ struct ttn_compile { nir_register *addr_reg; + nir_variable **inputs; + nir_variable **outputs; + /** * Stack of nir_cursors where instructions should be pushed as we pop * back out of the control flow stack. @@ -301,6 +304,7 @@ ttn_emit_declaration(struct ttn_compile *c) } exec_list_push_tail(&b->shader->inputs, &var->node); + c->inputs[idx] = var; for (int i = 0; i < array_size; i++) b->shader->info.inputs_read |= 1 << (var->data.location + i); @@ -368,6 +372,7 @@ ttn_emit_declaration(struct ttn_compile *c) } exec_list_push_tail(&b->shader->outputs, &var->node); + c->outputs[idx] = var; for (int i = 0; i < array_size; i++) b->shader->info.outputs_written |= 1 << (var->data.location + i); @@ -504,45 +509,41 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, } case TGSI_FILE_INPUT: + /* Special case: Turn the frontface varying into a load of the + * frontface intrinsic plus math, and appending the silly floats. + */ + if (c->scan->processor == PIPE_SHADER_FRAGMENT && + c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) { + nir_ssa_def *tgsi_frontface[4] = { + nir_bcsel(&c->build, + nir_load_system_value(&c->build, + nir_intrinsic_load_front_face, 0), + nir_imm_float(&c->build, 1.0), + nir_imm_float(&c->build, -1.0)), + nir_imm_float(&c->build, 0.0), + nir_imm_float(&c->build, 0.0), + nir_imm_float(&c->build, 1.0), + }; + + return nir_src_for_ssa(nir_vec(&c->build, tgsi_frontface, 4)); + } else { + /* Indirection on input arrays isn't supported by TTN. */ + assert(!dim); + nir_deref_instr *deref = nir_build_deref_var(&c->build, + c->inputs[index]); + return nir_src_for_ssa(nir_load_deref(&c->build, deref)); + } + break; + case TGSI_FILE_CONSTANT: { nir_intrinsic_instr *load; nir_intrinsic_op op; unsigned srcn = 0; - switch (file) { - case TGSI_FILE_INPUT: - /* Special case: Turn the frontface varying into a load of the - * frontface intrinsic plus math, and appending the silly floats. - */ - if (c->scan->processor == PIPE_SHADER_FRAGMENT && - c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) { - nir_ssa_def *tgsi_frontface[4] = { - nir_bcsel(&c->build, - nir_load_system_value(&c->build, - nir_intrinsic_load_front_face, 0), - nir_imm_float(&c->build, 1.0), - nir_imm_float(&c->build, -1.0)), - nir_imm_float(&c->build, 0.0), - nir_imm_float(&c->build, 0.0), - nir_imm_float(&c->build, 1.0), - }; - - return nir_src_for_ssa(nir_vec(&c->build, tgsi_frontface, 4)); - } - - op = nir_intrinsic_load_input; - assert(!dim); - break; - case TGSI_FILE_CONSTANT: - if (dim && (dim->Index > 0 || dim->Indirect)) { - op = nir_intrinsic_load_ubo; - } else { - op = nir_intrinsic_load_uniform; - } - break; - default: - unreachable("No other load files supported"); - break; + if (dim && (dim->Index > 0 || dim->Indirect)) { + op = nir_intrinsic_load_ubo; + } else { + op = nir_intrinsic_load_uniform; } load = nir_intrinsic_instr_create(b->shader, op); @@ -1758,35 +1759,25 @@ ttn_add_output_stores(struct ttn_compile *c) { nir_builder *b = &c->build; - foreach_list_typed(nir_variable, var, node, &b->shader->outputs) { - unsigned array_len = MAX2(glsl_get_length(var->type), 1); - unsigned i; - - for (i = 0; i < array_len; i++) { - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); - unsigned loc = var->data.driver_location + i; + for (int i = 0; i < c->build.shader->num_outputs; i++) { + nir_variable *var = c->outputs[i]; + if (!var) + continue; - nir_src src = nir_src_for_reg(c->output_regs[loc].reg); - src.reg.base_offset = c->output_regs[loc].offset; + nir_src src = nir_src_for_reg(c->output_regs[i].reg); + src.reg.base_offset = c->output_regs[i].offset; - if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT && - var->data.location == FRAG_RESULT_DEPTH) { - /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while - * NIR uses a single float FRAG_RESULT_DEPTH. - */ - src = nir_src_for_ssa(nir_channel(b, nir_ssa_for_src(b, src, 4), 2)); - store->num_components = 1; - } else { - store->num_components = 4; - } - store->src[0] = src; - - nir_intrinsic_set_base(store, loc); - nir_intrinsic_set_write_mask(store, 0xf); - store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_builder_instr_insert(b, &store->instr); + nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4); + if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT && + var->data.location == FRAG_RESULT_DEPTH) { + /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while + * NIR uses a single float FRAG_RESULT_DEPTH. + */ + store_value = nir_channel(b, store_value, 2); } + + nir_store_deref(b, nir_build_deref_var(b, var), store_value, + (1 << store_value->num_components) - 1); } } @@ -1814,6 +1805,9 @@ tgsi_to_nir(const void *tgsi_tokens, s->num_uniforms = scan.const_file_max[0] + 1; s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; + c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs); + c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs); + c->output_regs = rzalloc_array(c, struct ttn_reg_info, scan.file_max[TGSI_FILE_OUTPUT] + 1); c->temp_regs = rzalloc_array(c, struct ttn_reg_info, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 125bf3b..63922bf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -309,9 +309,6 @@ ir3_shader_create(struct ir3_compiler *compiler, if (cso->type == PIPE_SHADER_IR_NIR) { /* we take ownership of the reference: */ nir = cso->ir.nir; - - NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, - (nir_lower_io_options)0); } else { debug_assert(cso->type == PIPE_SHADER_IR_TGSI); if (fd_mesa_debug & FD_DBG_DISASM) { @@ -320,6 +317,8 @@ ir3_shader_create(struct ir3_compiler *compiler, } nir = ir3_tgsi_to_nir(cso->tokens); } + NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, + (nir_lower_io_options)0); /* do first pass optimization, ignoring the key: */ shader->nir = ir3_optimize_nir(shader, nir, NULL); if (fd_mesa_debug & FD_DBG_DISASM) { diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index 8555458..e9fae77 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -193,9 +193,6 @@ v3d_shader_state_create(struct pipe_context *pctx, */ s = cso->ir.nir; - NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, - type_size, - (nir_lower_io_options)0); NIR_PASS_V(s, nir_lower_io, nir_var_uniform, uniforms_type_size, (nir_lower_io_options)0); @@ -213,6 +210,10 @@ v3d_shader_state_create(struct pipe_context *pctx, so->was_tgsi = true; } + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, + (nir_lower_io_options)0); + NIR_PASS_V(s, nir_opt_global_to_local); NIR_PASS_V(s, nir_lower_regs_to_ssa); NIR_PASS_V(s, nir_normalize_cubemap_coords); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3c43980..f734d12 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2487,9 +2487,6 @@ vc4_shader_state_create(struct pipe_context *pctx, */ s = cso->ir.nir; - NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, - type_size, - (nir_lower_io_options)0); NIR_PASS_V(s, nir_lower_io, nir_var_uniform, uniforms_type_size, (nir_lower_io_options)0); @@ -2505,6 +2502,10 @@ vc4_shader_state_create(struct pipe_context *pctx, s = tgsi_to_nir(cso->tokens, &nir_options); } + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, + (nir_lower_io_options)0); + NIR_PASS_V(s, nir_opt_global_to_local); NIR_PASS_V(s, nir_lower_regs_to_ssa); NIR_PASS_V(s, nir_normalize_cubemap_coords); -- 2.7.4