From: Rob Clark Date: Fri, 10 Aug 2018 15:57:26 +0000 (-0400) Subject: freedreno/ir3: stop hard-coding FS input regs X-Git-Tag: upstream/19.0.0~3196 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fdd35f497bd9fe840b5bcc4a21464967e7abf866;p=platform%2Fupstream%2Fmesa.git freedreno/ir3: stop hard-coding FS input regs We originally did this because at the time we didn't know all the bitfields to configure where various frag shader sysval's went. But we do. So switch to using sysvals for all the frag shader inputs. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 64eeb10..b6f8ff3 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -140,7 +140,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; - uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; + uint32_t pos_regid, posz_regid, psize_regid; + uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid; + uint32_t color_regid[4] = {0}; int constmode; int i, j; @@ -208,6 +210,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); } + face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_VARYING_COORD); + /* adjust regids for alpha output formats. there is no alpha render * format, so it's just treated like red */ @@ -230,10 +237,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | - COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) | - A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2)))); - OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); - OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid)); + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) | + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) | + A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid)); OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 7c399d9..bfe2be6 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -202,7 +202,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; - uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid; enum a3xx_threadsize fssz; int constmode; int i, j; @@ -244,10 +244,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -273,7 +273,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 286411e..ba3339f 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -350,13 +350,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); + samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); - vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 44ee5b2..e4979a6 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -71,7 +71,7 @@ struct ir3_context { struct ir3_instruction *frag_vcoord; /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ - struct ir3_instruction *frag_face, *frag_coord[4]; + struct ir3_instruction *frag_face, *frag_coord; /* For vertex shaders, keep track of the system values sources */ struct ir3_instruction *vertex_id, *basevertex, *instance_id; @@ -781,43 +781,6 @@ create_frag_input(struct ir3_context *ctx, bool use_ldlv) } static struct ir3_instruction * -create_frag_coord(struct ir3_context *ctx, unsigned comp) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr; - - compile_assert(ctx, !ctx->frag_coord[comp]); - - ctx->frag_coord[comp] = create_input(ctx, 0); - - switch (comp) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - * - * sub.s tmp, src, 8 - * shr.b tmp, tmp, 4 - * mov.u32f32 dst, tmp - * - */ - instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0, - create_immed(block, 8), 0); - instr = ir3_SHR_B(block, instr, 0, - create_immed(block, 4), 0); - instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); - - return instr; - case 2: /* .z */ - case 3: /* .w */ - default: - /* seems that we can use these as-is: */ - return ctx->frag_coord[comp]; - } -} - -static struct ir3_instruction * create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ @@ -2448,6 +2411,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (!ctx->frag_face) { ctx->so->frag_face = true; ctx->frag_face = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face); ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; } /* for fragface, we get -1 for back and 0 for front. However this is @@ -3229,6 +3193,46 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) ir3_END(ctx->block); } +static struct ir3_instruction * +create_frag_coord(struct ir3_context *ctx, unsigned comp) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *instr; + + if (!ctx->frag_coord) { + ctx->frag_coord = create_input_compmask(ctx, 0, 0xf); + /* defer add_sysval_input() until after all inputs created */ + } + + split_dest(block, &instr, ctx->frag_coord, comp, 1); + + switch (comp) { + case 0: /* .x */ + case 1: /* .y */ + /* for frag_coord, we get unsigned values.. we need + * to subtract (integer) 8 and divide by 16 (right- + * shift by 4) then convert to float: + * + * sub.s tmp, src, 8 + * shr.b tmp, tmp, 4 + * mov.u32f32 dst, tmp + * + */ + instr = ir3_SUB_S(block, instr, 0, + create_immed(block, 8), 0); + instr = ir3_SHR_B(block, instr, 0, + create_immed(block, 4), 0); + instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); + + return instr; + case 2: /* .z */ + case 3: /* .w */ + default: + /* seems that we can use these as-is: */ + return instr; + } +} + static void setup_input(struct ir3_context *ctx, nir_variable *in) { @@ -3406,7 +3410,7 @@ max_drvloc(struct exec_list *vars) } static const unsigned max_sysvals[SHADER_MAX] = { - [SHADER_FRAGMENT] = 8, + [SHADER_FRAGMENT] = 24, // TODO [SHADER_VERTEX] = 16, [SHADER_COMPUTE] = 16, // TODO how many do we actually need? }; @@ -3433,17 +3437,17 @@ emit_instructions(struct ir3_context *ctx) ninputs -= max_sysvals[ctx->so->type]; - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: + /* for fragment shader, the vcoord input register is used as the + * base for bary.f varying fetch instrs: */ + struct ir3_instruction *vcoord = NULL; if (ctx->so->type == SHADER_FRAGMENT) { - // TODO maybe a helper for fi since we need it a few places.. - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_vcoord = instr; + struct ir3_instruction *xy[2]; + + vcoord = create_input_compmask(ctx, 0, 0x3); + split_dest(ctx->block, xy, vcoord, 0, 2); + + ctx->frag_vcoord = create_collect(ctx, xy, 2); } /* Setup inputs: */ @@ -3451,6 +3455,19 @@ emit_instructions(struct ir3_context *ctx) setup_input(ctx, var); } + /* Defer add_sysval_input() stuff until after setup_inputs(), + * because sysvals need to be appended after varyings: + */ + if (vcoord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD, + 0x3, vcoord); + } + + if (ctx->frag_coord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD, + 0xf, ctx->frag_coord); + } + /* Setup outputs: */ nir_foreach_variable(var, &ctx->s->outputs) { setup_output(ctx, var); @@ -3470,76 +3487,26 @@ emit_instructions(struct ir3_context *ctx) emit_function(ctx, fxn); } -/* from NIR perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_vcoord and optionally frag_coord and - * frag_face. +/* from NIR perspective, we actually have varying inputs. But the varying + * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The + * only actual inputs are the sysvals. */ static void fixup_frag_inputs(struct ir3_context *ctx) { struct ir3_shader_variant *so = ctx->so; struct ir3 *ir = ctx->ir; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - ir->ninputs = 0; - - n = 4; /* always have frag_vcoord */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); + unsigned i = 0; - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); + /* sysvals should appear at the end of the inputs, drop everything else: */ + while ((i < so->inputs_count) && !so->inputs[i].sysval) + i++; - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_vcoord.. - */ - inputs[ir->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; + /* at IR level, inputs are always blocks of 4 scalars: */ + i *= 4; - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_vcoord as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[ir->ninputs++] = ctx->frag_coord[0]; - inputs[ir->ninputs++] = ctx->frag_coord[1]; - inputs[ir->ninputs++] = ctx->frag_coord[2]; - inputs[ir->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_vcoord: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[2]->instr = instr; - - ir->inputs = inputs; + ir->inputs = &ir->inputs[i]; + ir->ninputs -= i; } /* Fixup tex sampler state for astc/srgb workaround instructions. We diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 74d85ea..83bc375 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -1047,49 +1047,10 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) static int ra_alloc(struct ir3_ra_ctx *ctx) { - unsigned n = 0; - - /* frag shader inputs get pre-assigned, since we have some - * constraints/unknowns about setup for some of these regs: - */ - if (ctx->type == SHADER_FRAGMENT) { - struct ir3 *ir = ctx->ir; - unsigned i = 0, j; - if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { - struct ir3_instruction *instr = ir->inputs[i]; - int cls = size_to_class(1, true, false); - unsigned name = __ra_name(ctx, cls, instr); - unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; - - /* if we have frag_face, it gets hr0.x */ - ra_set_node_reg(ctx->g, name, reg); - i += 4; - } - - j = 0; - for (; i < ir->ninputs; i++) { - struct ir3_instruction *instr = ir->inputs[i]; - if (instr) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - - if (id->defn == instr) { - unsigned name, reg; - - name = ra_name(ctx, id); - reg = ctx->set->gpr_to_ra_reg[id->cls][j]; - - ra_set_node_reg(ctx->g, name, reg); - j += id->sz; - } - } - } - n = j; - } - /* pre-assign array elements: */ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - unsigned base = n; + unsigned base = 0; if (arr->end_ip == 0) continue; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index b0663d5..7bb4263 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -97,18 +97,6 @@ fixup_regfootprint(struct ir3_shader_variant *v) int32_t regid = (v->outputs[i].regid + 3) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } - - if (v->type == SHADER_FRAGMENT) { - /* NOTE: not sure how to turn pos_regid off.. but this could - * be, for example, r1.x while max reg used by the shader is - * r0.*, in which case we need to fixup the reg footprint: - */ - v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2); - if (v->frag_coord) - debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */ - if (v->frag_face) - debug_assert(v->info.max_half_reg >= 0); /* hr0.x */ - } } /* wrapper for ir3_assemble() which does some info fixup based on @@ -518,7 +506,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); break; case SHADER_FRAGMENT: - dump_reg(out, "pos (bary)", so->pos_regid); + dump_reg(out, "pos (bary)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD)); dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); if (so->color0_mrt) { dump_output(out, so, FRAG_RESULT_COLOR, "color"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 93182c7..507e89c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -251,10 +251,6 @@ struct ir3_shader_variant { * + From the vert shader, we only need the output regid */ - /* for frag shader, pos_regid holds the frag_vcoord, ie. what is passed - * to bary.f instructions - */ - uint8_t pos_regid; bool frag_coord, frag_face, color0_mrt; /* NOTE: for input/outputs, slot is: