From 9557cd39e2ed749493d7af2a8e094415e3cc252d Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 29 Apr 2013 03:27:29 +0800 Subject: [PATCH] ilo: implement GEN7 SO GPE functions They were just stubs before. --- src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c | 6 +- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 174 ++++++++++++++++++++----- src/gallium/drivers/ilo/ilo_gpe_gen7.h | 10 +- 3 files changed, 150 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index 91fa7f4..1b39b29 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -381,12 +381,12 @@ gen7_pipeline_sol(struct ilo_3d_pipeline *p, int i; for (i = 0; i < 4; i++) - p->gen7_3DSTATE_SO_BUFFER(p->dev, i, false, p->cp); + p->gen7_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp); - p->gen7_3DSTATE_SO_DECL_LIST(p->dev, p->cp); + p->gen7_3DSTATE_SO_DECL_LIST(p->dev, NULL, NULL, p->cp); } - p->gen7_3DSTATE_STREAMOUT(p->dev, false, false, false, p->cp); + p->gen7_3DSTATE_STREAMOUT(p->dev, 0, 0, false, p->cp); } } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index f6282b4..b703a69 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -542,38 +542,58 @@ gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, static void gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, - bool enable, + unsigned buffer_mask, + int vertex_attrib_count, bool rasterizer_discard, - bool flatshade_first, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); const uint8_t cmd_len = 3; + const bool enable = (buffer_mask != 0); uint32_t dw1, dw2; - int i; + int read_len; ILO_GPE_VALID_GEN(dev, 7, 7); if (!enable) { + dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; + if (rasterizer_discard) + dw1 |= SO_RENDERING_DISABLE; + + dw2 = 0; + ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, (rasterizer_discard) ? SO_RENDERING_DISABLE : 0); - ilo_cp_write(cp, 0); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); ilo_cp_end(cp); return; } + read_len = (vertex_attrib_count + 1) / 2; + if (!read_len) + read_len = 1; + dw1 = SO_FUNCTION_ENABLE | - SO_STATISTICS_ENABLE; + 0 << SO_RENDER_STREAM_SELECT_SHIFT | + SO_STATISTICS_ENABLE | + buffer_mask << 8; + if (rasterizer_discard) dw1 |= SO_RENDERING_DISABLE; - if (!flatshade_first) + + /* API_OPENGL */ + if (true) dw1 |= SO_REORDER_TRAILING; - for (i = 0; i < 4; i++) - dw1 |= SO_BUFFER_ENABLE(i); - dw2 = 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | - 0 << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | + (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -991,33 +1011,111 @@ gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, static void gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, + const struct pipe_stream_output_info *so_info, + const struct ilo_shader *sh, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); - uint8_t cmd_len; - uint16_t decls[128]; - int num_decls, i; + uint16_t cmd_len; + int buffer_selects, num_entries, i; + uint16_t so_decls[128]; ILO_GPE_VALID_GEN(dev, 7, 7); - memset(decls, 0, sizeof(decls)); - num_decls = 0; + buffer_selects = 0; + num_entries = 0; + + if (so_info) { + int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + + memset(buffer_offsets, 0, sizeof(buffer_offsets)); + + for (i = 0; i < so_info->num_outputs; i++) { + unsigned decl, buf, attr, mask; + + buf = so_info->output[i].output_buffer; + + /* pad with holes */ + assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); + while (buffer_offsets[buf] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; + if (num_dwords > 4) + num_dwords = 4; + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | + SO_DECL_HOLE_FLAG | + ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT; + + so_decls[num_entries++] = decl; + buffer_offsets[buf] += num_dwords; + } + + /* figure out which attribute is sourced */ + for (attr = 0; attr < sh->out.count; attr++) { + const int idx = sh->out.register_indices[attr]; + if (idx == so_info->output[i].register_index) + break; + } + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT; + + if (attr < sh->out.count) { + mask = ((1 << so_info->output[i].num_components) - 1) << + so_info->output[i].start_component; + + /* PSIZE is at W channel */ + if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { + assert(mask == 0x1); + mask = (mask << 3) & 0xf; + } - cmd_len = 2 * num_decls + 3; + decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT | + mask << SO_DECL_COMPONENT_MASK_SHIFT; + } + else { + assert(!"stream output an undefined register"); + mask = (1 << so_info->output[i].num_components) - 1; + decl |= SO_DECL_HOLE_FLAG | + mask << SO_DECL_COMPONENT_MASK_SHIFT; + } + + so_decls[num_entries++] = decl; + buffer_selects |= 1 << buf; + buffer_offsets[buf] += so_info->output[i].num_components; + } + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "Errata: All 128 decls for all four streams must be included + * whenever this command is issued. The "Num Entries [n]" fields still + * contain the actual numbers of valid decls." + * + * Also note that "DWord Length" has 9 bits for this command, and the type + * of cmd_len is thus uint16_t. + */ + cmd_len = 2 * 128 + 3; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT | - 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | + ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT | 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT | - 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT); - ilo_cp_write(cp, num_decls << SO_NUM_ENTRIES_0_SHIFT | - 0 << SO_NUM_ENTRIES_1_SHIFT | + 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | + buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT); + ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT | 0 << SO_NUM_ENTRIES_2_SHIFT | - 0 << SO_NUM_ENTRIES_3_SHIFT); + 0 << SO_NUM_ENTRIES_1_SHIFT | + num_entries << SO_NUM_ENTRIES_0_SHIFT); - for (i = 0; i < num_decls; i++) { - ilo_cp_write(cp, decls[i]); + for (i = 0; i < num_entries; i++) { + ilo_cp_write(cp, so_decls[i]); + ilo_cp_write(cp, 0); + } + for (; i < 128; i++) { + ilo_cp_write(cp, 0); ilo_cp_write(cp, 0); } @@ -1026,17 +1124,18 @@ gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, static void gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, - int index, - bool enable, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); const uint8_t cmd_len = 4; - int start, end; + struct ilo_resource *res; + int end; ILO_GPE_VALID_GEN(dev, 7, 7); - if (!enable) { + if (!so_target || !so_target->buffer) { ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); @@ -1046,13 +1145,22 @@ gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, return; } - start = end = 0; + res = ilo_resource(so_target->buffer); + + /* DWord-aligned */ + assert(stride % 4 == 0 && base % 4 == 0); + assert(so_target->buffer_offset % 4 == 0); + + stride &= ~3; + base = (base + so_target->buffer_offset) & ~3; + end = (base + so_target->buffer_size) & ~3; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); - ilo_cp_write(cp, start); - ilo_cp_write(cp, end); + ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT | + stride); + ilo_cp_write_bo(cp, base, res->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write_bo(cp, end, res->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); ilo_cp_end(cp); } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index d9626e1..118a539 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -224,9 +224,9 @@ typedef void typedef void (*ilo_gpe_gen7_3DSTATE_STREAMOUT)(const struct ilo_dev_info *dev, - bool enable, + unsigned buffer_mask, + int vertex_attrib_count, bool rasterizer_discard, - bool flatshade_first, struct ilo_cp *cp); typedef void @@ -366,12 +366,14 @@ typedef void typedef void (*ilo_gpe_gen7_3DSTATE_SO_DECL_LIST)(const struct ilo_dev_info *dev, + const struct pipe_stream_output_info *so_info, + const struct ilo_shader *sh, struct ilo_cp *cp); typedef void (*ilo_gpe_gen7_3DSTATE_SO_BUFFER)(const struct ilo_dev_info *dev, - int index, - bool enable, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target, struct ilo_cp *cp); typedef ilo_gpe_gen6_PIPE_CONTROL ilo_gpe_gen7_PIPE_CONTROL; -- 2.7.4