* But TGSI doesn't know that, it still declares things as
* IN[] registers. So we do all the input tracking normally
* and fix things up after compile_instructions()
+ *
+ * NOTE that frag_pos is the hardware position (possibly it
+ * is actually an index or tag or some such.. it is *not*
+ * values that can be directly used for gl_FragCoord..)
*/
- struct ir3_instruction *frag_pos;
+ struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
struct tgsi_parse_context parser;
unsigned type;
ctx->current_instr = NULL;
ctx->num_output_updates = 0;
ctx->atomic = false;
+ ctx->frag_pos = NULL;
+ ctx->frag_face = NULL;
+
+ memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
#define FM(x) (1 << TGSI_FILE_##x)
/* optimize can't deal with relative addressing: */
* position)
*/
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- nin = MAX2(2, nin);
+ int n = 2;
+ if (ctx->info.reads_position)
+ n += 4;
+ if (ctx->info.uses_frontface)
+ n += 4;
+ nin = MAX2(n, nin);
nout += ARRAY_SIZE(ctx->kill);
}
} else {
return fd3_semantic_name(sem->Name, sem->Index);
}
+static struct ir3_instruction *
+decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid,
+ unsigned j, unsigned inloc)
+{
+ struct ir3_instruction *instr;
+ struct ir3_register *src;
+
+ /* bary.f dst, #inloc, r0.x */
+ instr = instr_create(ctx, 2, OPC_BARY_F);
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+ src = ir3_reg_create(instr, 0, IR3_REG_SSA);
+ src->wrmask = 0x3;
+ src->instr = ctx->frag_pos;
+
+ return instr;
+}
+
+/* TGSI_SEMANTIC_POSITION
+ * """"""""""""""""""""""
+ *
+ * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
+ * fragment shader input contains the fragment's window position. The X
+ * component starts at zero and always increases from left to right.
+ * The Y component starts at zero and always increases but Y=0 may either
+ * indicate the top of the window or the bottom depending on the fragment
+ * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
+ * The Z coordinate ranges from 0 to 1 to represent depth from the front
+ * to the back of the Z buffer. The W component contains the reciprocol
+ * of the interpolated vertex position W component.
+ */
+static struct ir3_instruction *
+decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid,
+ unsigned j)
+{
+ struct ir3_instruction *instr, *src;
+
+ compile_assert(ctx, !ctx->frag_coord[j]);
+
+ ctx->frag_coord[j] = create_input(ctx->block, NULL, 0);
+
+
+ switch (j) {
+ case 0: /* .x */
+ case 1: /* .y */
+ /* for frag_coord, we get unsigned values.. we need
+ * to subtract (integer) 8 and divide by 16 (right-
+ * shift by 4) then convert to float:
+ */
+
+ /* add.s tmp, src, -8 */
+ instr = instr_create(ctx, 2, OPC_ADD_S);
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j];
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8;
+ src = instr;
+
+ /* shr.b tmp, tmp, 4 */
+ instr = instr_create(ctx, 2, OPC_SHR_B);
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
+ src = instr;
+
+ /* mov.u32f32 dst, tmp */
+ instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = TYPE_U32;
+ instr->cat1.dst_type = TYPE_F32;
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+
+ break;
+ case 2: /* .z */
+ case 3: /* .w */
+ /* seems that we can use these as-is: */
+ instr = ctx->frag_coord[j];
+ break;
+ default:
+ compile_error(ctx, "invalid channel\n");
+ instr = create_immed(ctx, 0.0);
+ break;
+ }
+
+ return instr;
+}
+
+/* TGSI_SEMANTIC_FACE
+ * """"""""""""""""""
+ *
+ * This label applies to fragment shader inputs only and indicates that
+ * the register contains front/back-face information of the form (F, 0,
+ * 0, 1). The first component will be positive when the fragment belongs
+ * to a front-facing polygon, and negative when the fragment belongs to a
+ * back-facing polygon.
+ */
+static struct ir3_instruction *
+decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid,
+ unsigned j)
+{
+ struct ir3_instruction *instr, *src;
+
+ switch (j) {
+ case 0: /* .x */
+ compile_assert(ctx, !ctx->frag_face);
+
+ ctx->frag_face = create_input(ctx->block, NULL, 0);
+
+ /* for faceness, we always get -1 or 0 (int).. but TGSI expects
+ * positive vs negative float.. and piglit further seems to
+ * expect -1.0 or 1.0:
+ *
+ * mul.s tmp, hr0.x, 2
+ * add.s tmp, tmp, 1
+ * mov.s16f32, dst, tmp
+ *
+ */
+
+ instr = instr_create(ctx, 2, OPC_MUL_S);
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
+ src = instr;
+
+ instr = instr_create(ctx, 2, OPC_ADD_S);
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+ src = instr;
+
+ instr = instr_create(ctx, 1, 0); /* mov */
+ instr->cat1.src_type = TYPE_S32;
+ instr->cat1.dst_type = TYPE_F32;
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+
+ break;
+ case 1: /* .y */
+ case 2: /* .z */
+ instr = create_immed(ctx, 0.0);
+ break;
+ case 3: /* .w */
+ instr = create_immed(ctx, 1.0);
+ break;
+ default:
+ compile_error(ctx, "invalid channel\n");
+ instr = create_immed(ctx, 0.0);
+ break;
+ }
+
+ return instr;
+}
+
static void
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
struct fd3_shader_variant *so = ctx->so;
- unsigned i, flags = 0;
+ unsigned name = decl->Semantic.Name;
+ unsigned i;
/* I don't think we should get frag shader input without
* semantic info? Otherwise how do inputs get linked to
unsigned r = regid(i, 0);
unsigned ncomp, j;
- /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
+ /* we'll figure out the actual components used after scheduling */
ncomp = 4;
DBG("decl in -> r%d", i);
so->inputs[n].compmask = (1 << ncomp) - 1;
so->inputs[n].regid = r;
so->inputs[n].inloc = ctx->next_inloc;
- ctx->next_inloc += ncomp;
-
- so->total_in += ncomp;
for (j = 0; j < ncomp; j++) {
- struct ir3_instruction *instr;
+ struct ir3_instruction *instr = NULL;
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- struct ir3_register *src;
-
- instr = instr_create(ctx, 2, OPC_BARY_F);
-
- /* dst register: */
- ir3_reg_create(instr, r + j, flags);
-
- /* input position: */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val =
- so->inputs[n].inloc + j - 8;
-
- /* input base (always r0.xy): */
- src = ir3_reg_create(instr, regid(0,0), IR3_REG_SSA);
- src->wrmask = 0x3;
- src->instr = ctx->frag_pos;
-
+ /* for fragment shaders, POSITION and FACE are handled
+ * specially, not using normal varying / bary.f
+ */
+ if (name == TGSI_SEMANTIC_POSITION) {
+ so->inputs[n].bary = false;
+ so->frag_coord = true;
+ instr = decl_in_frag_coord(ctx, r + j, j);
+ } else if (name == TGSI_SEMANTIC_FACE) {
+ so->inputs[n].bary = false;
+ so->frag_face = true;
+ instr = decl_in_frag_face(ctx, r + j, j);
+ } else {
+ so->inputs[n].bary = true;
+ instr = decl_in_frag_bary(ctx, r + j, j,
+ so->inputs[n].inloc + j - 8);
+ }
} else {
instr = create_input(ctx->block, NULL, (i * 4) + j);
}
ctx->block->inputs[(i * 4) + j] = instr;
}
+
+ if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) {
+ ctx->next_inloc += ncomp;
+ so->total_in += ncomp;
+ }
}
}
ctx->so->samplers_count++;
}
+/* from TGSI perspective, we actually have inputs. But most of the "inputs"
+ * for a fragment shader are just bary.f instructions. The *actual* inputs
+ * from the hw perspective are the frag_pos and optionally frag_coord and
+ * frag_face.
+ */
+static void
+fixup_frag_inputs(struct fd3_compile_context *ctx)
+{
+ struct fd3_shader_variant *so = ctx->so;
+ struct ir3_block *block = ctx->block;
+ struct ir3_instruction *instr;
+ int regid = 0;
+
+ block->ninputs = 0;
+
+ if (so->frag_face) {
+ /* this ultimately gets assigned to hr0.x so doesn't conflict
+ * with frag_coord/frag_pos..
+ */
+ block->inputs[block->ninputs++] = ctx->frag_face;
+ ctx->frag_face->regs[0]->num = 0;
+
+ /* remaining channels not used, but let's avoid confusing
+ * other parts that expect inputs to come in groups of vec4
+ */
+ block->inputs[block->ninputs++] = NULL;
+ block->inputs[block->ninputs++] = NULL;
+ block->inputs[block->ninputs++] = NULL;
+ }
+
+ /* since we don't know where to set the regid for frag_coord,
+ * we have to use r0.x for it. But we don't want to *always*
+ * use r1.x for frag_pos as that could increase the register
+ * footprint on simple shaders:
+ */
+ if (so->frag_coord) {
+ ctx->frag_coord[0]->regs[0]->num = regid++;
+ ctx->frag_coord[1]->regs[0]->num = regid++;
+ ctx->frag_coord[2]->regs[0]->num = regid++;
+ ctx->frag_coord[3]->regs[0]->num = regid++;
+
+ block->inputs[block->ninputs++] = ctx->frag_coord[0];
+ block->inputs[block->ninputs++] = ctx->frag_coord[1];
+ block->inputs[block->ninputs++] = ctx->frag_coord[2];
+ block->inputs[block->ninputs++] = ctx->frag_coord[3];
+ }
+
+ /* we always have frag_pos: */
+ so->pos_regid = regid;
+
+ /* r0.x */
+ instr = create_input(block, NULL, block->ninputs);
+ instr->regs[0]->num = regid++;
+ block->inputs[block->ninputs++] = instr;
+ ctx->frag_pos->regs[1]->instr = instr;
+
+ /* r0.y */
+ instr = create_input(block, NULL, block->ninputs);
+ instr->regs[0]->num = regid++;
+ block->inputs[block->ninputs++] = instr;
+ ctx->frag_pos->regs[2]->instr = instr;
+}
+
static void
compile_instructions(struct fd3_compile_context *ctx)
{
push_block(ctx);
- /* for fragment shader, we have a single input register (r0.xy)
- * which is used as the base for bary.f varying fetch instrs:
+ /* for fragment shader, we have a single input register (usually
+ * r0.xy) which is used as the base for bary.f varying fetch instrs:
*/
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
struct ir3_instruction *instr;
}
/* fixup actual inputs for frag shader: */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- struct ir3_instruction *instr;
-
- ctx->block->ninputs = 2;
-
- /* r0.x */
- instr = create_input(ctx->block, NULL, 0);
- ctx->block->inputs[0] = instr;
- ctx->frag_pos->regs[1]->instr = instr;
-
- /* r0.y */
- instr = create_input(ctx->block, NULL, 1);
- ctx->block->inputs[1] = instr;
- ctx->frag_pos->regs[2]->instr = instr;
- }
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT)
+ fixup_frag_inputs(ctx);
}
static void
const struct tgsi_token *tokens, struct fd3_shader_key key)
{
struct fd3_compile_context ctx;
+ struct ir3_block *block;
unsigned i, actual_in;
int ret = 0;
compile_instructions(&ctx);
+ block = ctx.block;
+
/* at this point, we want the kill's in the outputs array too,
* so that they get scheduled (since they have no dst).. we've
* already ensured that the array is big enough in push_block():
*/
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
- struct ir3_block *block = ctx.block;
for (i = 0; i < ctx.kill_count; i++)
block->outputs[block->noutputs++] = ctx.kill[i];
}
if (fd_mesa_debug & FD_DBG_OPTDUMP)
compile_dump(&ctx);
- ret = ir3_block_flatten(ctx.block);
+ ret = ir3_block_flatten(block);
if (ret < 0)
goto out;
if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP))
compile_dump(&ctx);
- ir3_block_cp(ctx.block);
+ ir3_block_cp(block);
if (fd_mesa_debug & FD_DBG_OPTDUMP)
compile_dump(&ctx);
- ir3_block_depth(ctx.block);
+ ir3_block_depth(block);
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER DEPTH:\n");
- ir3_dump_instr_list(ctx.block->head);
+ ir3_dump_instr_list(block->head);
}
- ir3_block_sched(ctx.block);
+ ir3_block_sched(block);
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER SCHED:\n");
- ir3_dump_instr_list(ctx.block->head);
+ ir3_dump_instr_list(block->head);
}
- ret = ir3_block_ra(ctx.block, so->type, key.half_precision);
+ ret = ir3_block_ra(block, so->type, key.half_precision,
+ so->frag_coord, so->frag_face);
if (ret)
goto out;
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER RA:\n");
- ir3_dump_instr_list(ctx.block->head);
+ ir3_dump_instr_list(block->head);
}
/* fixup input/outputs: */
for (i = 0; i < so->outputs_count; i++) {
- so->outputs[i].regid = ctx.block->outputs[i*4]->regs[0]->num;
+ so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
/* preserve hack for depth output.. tgsi writes depth to .z,
* but what we give the hw is the scalar register:
*/
for (i = 0; i < so->inputs_count; i++) {
unsigned j, regid = ~0, compmask = 0;
for (j = 0; j < 4; j++) {
- struct ir3_instruction *in = ctx.block->inputs[(i*4) + j];
+ struct ir3_instruction *in = block->inputs[(i*4) + j];
if (in) {
compmask |= (1 << j);
regid = in->regs[0]->num - j;
struct fd3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n)
{
- uint32_t i;
+ uint32_t i, j, last = 0;
n = MIN2(n, vp->inputs_count);
- for (i = 0; i < n; i++) {
- struct pipe_resource *prsc = vbufs[i].prsc;
- struct fd_resource *rsc = fd_resource(prsc);
- enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
- bool switchnext = (i != (n - 1));
- uint32_t fs = util_format_get_blocksize(vbufs[i].format);
-
- OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
- OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
- COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
- A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
- A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
-
- OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
- OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
- A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
- A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
- A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+ for (i = 0; i < n; i++)
+ if (vp->inputs[i].compmask)
+ last = i;
+
+ for (i = 0, j = 0; i <= last; i++) {
+ if (vp->inputs[i].compmask) {
+ struct pipe_resource *prsc = vbufs[i].prsc;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
+ bool switchnext = (i != last);
+ uint32_t fs = util_format_get_blocksize(vbufs[i].format);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ j++;
+ }
}
}
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
}
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
- struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !key.binning_pass) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
- if (!key.binning_pass) {
- struct fd3_context *fd3_ctx = fd3_context(ctx);
+ val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
+ val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
+ A3XX_RB_RENDER_CONTROL_YCOORD |
+ A3XX_RB_RENDER_CONTROL_ZCOORD |
+ A3XX_RB_RENDER_CONTROL_WCOORD);
- /* I suppose if we needed to (which I don't *think* we need
- * to), we could emit this for binning pass too. But we
- * would need to keep a different patch-list for binning
- * vs render pass.
- */
+ /* I suppose if we needed to (which I don't *think* we need
+ * to), we could emit this for binning pass too. But we
+ * would need to keep a different patch-list for binning
+ * vs render pass.
+ */
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RINGP(ring, zsa->rb_render_control,
- &fd3_ctx->rbrc_patches);
- }
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RINGP(ring, val, &fd3_context(ctx)->rbrc_patches);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
- if (fp->writes_pos) {
- val |= A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE;
- }
+ val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
+ val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
+ A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
return 0;
}
+static int
+next_varying(const struct fd3_shader_variant *so, int i)
+{
+ while (++i < so->inputs_count)
+ if (so->inputs[i].compmask && so->inputs[i].bary)
+ break;
+ return i;
+}
+
static uint32_t
find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
const struct fd3_shader_variant *vp, *fp;
const struct ir3_shader_info *vsi, *fsi;
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
- int i;
+ int i, j, k;
vp = fd3_shader_variant(prog->vp, key);
A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
- A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+ COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
- OUT_RING(ring, 0x00000000); /* HLSQ_CONTROL_3_REG */
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
- A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->inputs_count));
+ A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
- for (i = 0; i < fp->inputs_count; ) {
+ for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
uint32_t reg = 0;
- int j;
- OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);
+ OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
- j = find_output(vp, fp->inputs[i].semantic);
- reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[j].regid);
- reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask);
- i++;
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count) {
+ k = find_output(vp, fp->inputs[j].semantic);
+ reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
+ reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
+ }
- j = find_output(vp, fp->inputs[i].semantic);
- reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[j].regid);
- reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask);
- i++;
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count) {
+ k = find_output(vp, fp->inputs[j].semantic);
+ reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
+ reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
+ }
OUT_RING(ring, reg);
}
- for (i = 0; i < fp->inputs_count; ) {
+ for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
uint32_t reg = 0;
- OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);
-
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc);
+ OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
+
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count)
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc);
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count)
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc);
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count)
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc);
+ j = next_varying(fp, j);
+ if (j < fp->inputs_count)
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc);
OUT_RING(ring, reg);
}