freedreno/a6xx: Program state for tessellation stages
authorKristian H. Kristensen <hoegsberg@google.com>
Wed, 23 Oct 2019 03:03:07 +0000 (20:03 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Fri, 8 Nov 2019 00:40:27 +0000 (16:40 -0800)
Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
Acked-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Rob Clark <robdclark@gmail.com>
src/freedreno/registers/a6xx.xml
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 7f464f1..747f071 100644 (file)
@@ -1856,6 +1856,7 @@ to upconvert to 32b float internally?
 
        <reg32 offset="0x8000" name="GRAS_UNKNOWN_8000"/>
        <reg32 offset="0x8001" name="GRAS_UNKNOWN_8001"/>
+       <reg32 offset="0x8002" name="GRAS_UNKNOWN_8002"/>
        <reg32 offset="0x8003" name="GRAS_UNKNOWN_8003"/>
 
        <enum name="a6xx_layer_type">
@@ -1936,6 +1937,8 @@ to upconvert to 32b float internally?
                <bitfield name="GS_WRITES_LAYER" pos="0" type="boolean"/>
        </reg32>
 
+       <reg32 offset="0x809d" name="GRAS_UNKNOWN_809D"/>
+
        <reg32 offset="0x80a0" name="GRAS_UNKNOWN_80A0"/>
 
        <reg32 offset="0x80a2" name="GRAS_RAS_MSAA_CNTL">
@@ -2421,6 +2424,7 @@ to upconvert to 32b float internally?
        <!-- always 0x00ffff00 ? */ -->
        <reg32 offset="0x9101" name="VPC_UNKNOWN_9101"/>
        <reg32 offset="0x9102" name="VPC_UNKNOWN_9102"/>
+       <reg32 offset="0x9103" name="VPC_UNKNOWN_9103"/>
 
        <reg32 offset="0x9104" name="VPC_GS_SIV_CNTL"/>
 
@@ -2428,6 +2432,7 @@ to upconvert to 32b float internally?
                <bitfield name="LAYERLOC" low="0" high="7" type="uint"/>
        </reg32>
 
+       <reg32 offset="0x9106" name="VPC_UNKNOWN_9106"/>
        <reg32 offset="0x9107" name="VPC_UNKNOWN_9107"/>
        <reg32 offset="0x9108" name="VPC_UNKNOWN_9108"/>
 
index 149b27b..8a54895 100644 (file)
@@ -1338,7 +1338,6 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 
        WRITE(REG_A6XX_VPC_SO_OVERRIDE, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
 
-       WRITE(REG_A6XX_PC_UNKNOWN_9801, 0);
        WRITE(REG_A6XX_PC_UNKNOWN_9806, 0);
        WRITE(REG_A6XX_PC_UNKNOWN_9980, 0);
 
index f2f1a7f..81cd4d9 100644 (file)
@@ -245,6 +245,22 @@ fd6_stage2shadersb(gl_shader_stage type)
        }
 }
 
+static inline enum a6xx_tess_spacing
+fd6_gl2spacing(enum gl_tess_spacing spacing)
+{
+       switch (spacing) {
+       case TESS_SPACING_EQUAL:
+               return TESS_EQUAL;
+       case TESS_SPACING_FRACTIONAL_ODD:
+               return TESS_FRACTIONAL_ODD;
+       case TESS_SPACING_FRACTIONAL_EVEN:
+               return TESS_FRACTIONAL_EVEN;
+       case TESS_SPACING_UNSPECIFIED:
+       default:
+               unreachable("spacing must be specified");
+       }
+}
+
 bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
                enum pipe_shader_type type, struct fd_texture_stateobj *tex,
                unsigned bcolor_offset,
index 087059b..5e54c7e 100644 (file)
@@ -277,6 +277,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
        uint32_t smask_in_regid, smask_regid;
        uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid;
+       uint32_t hs_invocation_regid;
+       uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_patch_regid, ds_patch_regid;
        uint32_t ij_pix_regid, ij_samp_regid, ij_cent_regid, ij_size_regid;
        uint32_t gs_header_regid;
        enum a3xx_threadsize fssz;
@@ -304,8 +306,25 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
        instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
 
+       if (hs) {
+               tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
+               tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
+               hs_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
+               ds_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
+               hs_invocation_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
+
+               pos_regid = ir3_find_output_regid(ds, VARYING_SLOT_POS);
+               psize_regid = ir3_find_output_regid(ds, VARYING_SLOT_PSIZ);
+       } else {
+               tess_coord_x_regid = regid(63, 0);
+               tess_coord_y_regid = regid(63, 0);
+               hs_patch_regid = regid(63, 0);
+               ds_patch_regid = regid(63, 0);
+               hs_invocation_regid = regid(63, 0);
+       }
+
        if (gs) {
-               gs_header_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_GS_HEADER_IR3);
+               gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
                primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
                pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS);
                psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ);
@@ -399,14 +418,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                        COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
-       if (gs)
-               ir3_link_shaders(&l, gs, fs);
-       else
-               ir3_link_shaders(&l, vs, fs);
-
-       const struct ir3_shader_variant *so_shader = fd6_last_shader(state);
-       if (so_shader->shader->stream_output.num_outputs > 0)
-               link_stream_out(&l, so_shader);
+       const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
+       ir3_link_shaders(&l, last_shader, fs);
 
        BITSET_DECLARE(varbs, 128) = {0};
        uint32_t *varmask = (uint32_t *)varbs;
@@ -421,6 +434,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        OUT_RING(ring, ~varmask[2]);  /* VPC_VAR[2].DISABLE */
        OUT_RING(ring, ~varmask[3]);  /* VPC_VAR[3].DISABLE */
 
+       /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
+       if (last_shader->shader->stream_output.num_outputs > 0)
+               link_stream_out(&l, last_shader);
+
        if (VALIDREG(layer_regid)) {
                layer_loc = l.max_loc;
                ir3_link_add(&l, layer_regid, 0x1, l.max_loc);
@@ -436,13 +453,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
        }
 
-       if (so_shader->shader->stream_output.num_outputs > 0) {
-               setup_stream_out(state, so_shader, &l);
+       if (last_shader->shader->stream_output.num_outputs > 0) {
+               setup_stream_out(state, last_shader, &l);
        }
 
        debug_assert(l.cnt < 32);
        if (gs)
                OUT_PKT4(ring, REG_A6XX_SP_GS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
+       else if (ds)
+               OUT_PKT4(ring, REG_A6XX_SP_DS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
        else
                OUT_PKT4(ring, REG_A6XX_SP_VS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
 
@@ -462,6 +481,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
 
        if (gs)
                OUT_PKT4(ring, REG_A6XX_SP_GS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
+       else if (ds)
+               OUT_PKT4(ring, REG_A6XX_SP_DS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
        else
                OUT_PKT4(ring, REG_A6XX_SP_VS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
 
@@ -477,9 +498,89 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        }
 
        fd6_emit_shader(ring, vs);
+       ir3_emit_immediates(screen, vs, ring);
 
-       OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
-       OUT_RING(ring, 0);
+       if (hs) {
+               OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
+               OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                       A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
+                       A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack) |
+                       COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE));
+
+               fd6_emit_shader(ring, hs);
+               ir3_emit_immediates(screen, hs, ring);
+               ir3_emit_link_map(screen, vs, hs, ring);
+
+               OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
+               OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                       A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
+                       A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack) |
+                       COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE));
+
+               fd6_emit_shader(ring, ds);
+               ir3_emit_immediates(screen, ds, ring);
+               ir3_emit_link_map(screen, hs, ds, ring);
+
+               shader_info *hs_info = &hs->shader->nir->info;
+               OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
+               OUT_RING(ring, hs_info->tess.tcs_vertices_out);
+
+               /* Total attribute slots in HS incoming patch. */
+               OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9801, 1);
+               OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->shader->output_size / 4);
+
+               OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
+               OUT_RING(ring, vs->shader->output_size);
+
+               shader_info *ds_info = &ds->shader->nir->info;
+               OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1);
+               uint32_t output;
+               if (ds_info->tess.point_mode)
+                       output = TESS_POINTS;
+               else if (ds_info->tess.primitive_mode == GL_ISOLINES)
+                       output = TESS_LINES;
+               else if (ds_info->tess.ccw)
+                       output = TESS_CCW_TRIS;
+               else
+                       output = TESS_CW_TRIS;
+
+               OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING(fd6_gl2spacing(ds_info->tess.spacing)) |
+                               A6XX_PC_TESS_CNTL_OUTPUT(output));
+
+               /* xxx: Misc tess unknowns: */
+               OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9103, 1);
+               OUT_RING(ring, 0x00ffff00);
+
+               OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9106, 1);
+               OUT_RING(ring, 0x0000ffff);
+
+               OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_809D, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8002, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_VPC_PACK, 1);
+               OUT_RING(ring, A6XX_VPC_PACK_POSITIONLOC(pos_loc) |
+                                A6XX_VPC_PACK_PSIZELOC(255) |
+                                A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc));
+
+               OUT_PKT4(ring, REG_A6XX_VPC_PACK_3, 1);
+               OUT_RING(ring, A6XX_VPC_PACK_3_POSITIONLOC(pos_loc) |
+                                A6XX_VPC_PACK_3_PSIZELOC(psize_loc) |
+                                A6XX_VPC_PACK_3_STRIDE_IN_VPC(l.max_loc));
+
+               OUT_PKT4(ring, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1);
+               OUT_RING(ring, A6XX_SP_DS_PRIMITIVE_CNTL_DSOUT(l.cnt));
+
+               OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_4, 1);
+               OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_4_STRIDE_IN_VPC(l.max_loc) |
+                               CONDREG(psize_regid, 0x100));
+
+       } else {
+               OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
+               OUT_RING(ring, 0);
+       }
 
        OUT_PKT4(ring, REG_A6XX_SP_PRIMITIVE_CNTL, 1);
        OUT_RING(ring, A6XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
@@ -495,6 +596,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(l.max_loc) |
                        CONDREG(psize_regid, A6XX_PC_PRIMITIVE_CNTL_1_PSIZE));
 
+       OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
+       OUT_RING(ring, 0);
+
        OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
        OUT_RING(ring, 0x7);                /* XXX */
        OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
@@ -595,7 +699,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
 
                fd6_emit_shader(ring, gs);
                ir3_emit_immediates(screen, gs, ring);
-               ir3_emit_link_map(screen, vs, gs, ring);
+               if (ds)
+                       ir3_emit_link_map(screen, ds, gs, ring);
+               else
+                       ir3_emit_link_map(screen, vs, gs, ring);
 
                OUT_PKT4(ring, REG_A6XX_VPC_PACK_GS, 1);
                OUT_RING(ring, A6XX_VPC_PACK_GS_POSITIONLOC(pos_loc) |
@@ -646,19 +753,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9100, 1);
                OUT_RING(ring, 0xff);
 
-               OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9101, 1);
-               OUT_RING(ring, 0xffff00);
-
                OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9102, 1);
                OUT_RING(ring, 0xffff00);
 
-               OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
-               OUT_RING(ring, 0);
+               const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs;
 
                /* Size of per-primitive alloction in ldlw memory in vec4s. */
                uint32_t vec4_size =
                        gs->shader->nir->info.gs.vertices_in *
-                       DIV_ROUND_UP(vs->shader->output_size, 4);
+                       DIV_ROUND_UP(prev->shader->output_size, 4);
                OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
                OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
 
@@ -666,9 +769,21 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                OUT_RING(ring, 0);
 
                OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1);
-               OUT_RING(ring, 3);
+               OUT_RING(ring, prev->shader->output_size);
+       } else {
+               OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
+               OUT_RING(ring, 0);
+               OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1);
+               OUT_RING(ring, 0);
        }
 
+       OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9101, 1);
+       OUT_RING(ring, 0xffff00);
+
+       OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
+       OUT_RING(ring, 0);
+
+
        if (!binning_pass) {
                /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
                for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
@@ -701,11 +816,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                        A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
                        A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) |
                        0xfc000000);
-       OUT_RING(ring, A6XX_VFD_CONTROL_2_REGID_HSPATCHID(regid(63,0)) |
-                       A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(regid(63,0)));
-       OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(regid(63,0)) |
-                       A6XX_VFD_CONTROL_3_REGID_TESSX(regid(63,0)) |
-                       A6XX_VFD_CONTROL_3_REGID_TESSY(regid(63,0)) |
+       OUT_RING(ring, A6XX_VFD_CONTROL_2_REGID_HSPATCHID(hs_patch_regid) |
+                       A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
+       OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(ds_patch_regid) |
+                       A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
+                       A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
                        0xfc);
        OUT_RING(ring, 0x000000fc);   /* VFD_CONTROL_4 */
        OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) |
@@ -720,13 +835,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
        OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z));
 
-       ir3_emit_immediates(screen, vs, ring);
-
-       if (hs) {
-               ir3_emit_immediates(screen, hs, ring);
-               ir3_emit_immediates(screen, ds, ring);
-       }
-
        if (!binning_pass)
                ir3_emit_immediates(screen, fs, ring);
 }