From 4cef255872e8467aabce52938038a9d2bf27d9b2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 26 Oct 2016 12:46:58 -0700 Subject: [PATCH] vc4: Start using the pack header. This slightly inflates the size of the generated code, in exchange for getting us some convenient tools. before: 4389 0 0 4389 1125 src/gallium/drivers/vc4/.libs/vc4_draw.o 808 0 0 808 328 src/gallium/drivers/vc4/.libs/vc4_emit.o after: 4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o 988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o --- src/gallium/drivers/vc4/vc4_cl.h | 63 ++++++++++++++++++++++++++++++++++++ src/gallium/drivers/vc4/vc4_draw.c | 49 ++++++++++++++-------------- src/gallium/drivers/vc4/vc4_emit.c | 65 ++++++++++++++++++++++++-------------- src/gallium/drivers/vc4/vc4_job.c | 4 +-- 4 files changed, 130 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 74bf8cf..bec177c 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -40,6 +40,27 @@ struct vc4_job; */ struct vc4_cl_out; +/** A reference to a BO used in the CL packing functions */ +struct vc4_cl_reloc { + struct vc4_bo *bo; + uint32_t offset; +}; + +/* We don't call anything that packs a reloc yet, so don't implement it. */ +static inline void cl_pack_emit_reloc(void *cl, const struct vc4_cl_reloc *reloc) +{ + abort(); +} + +/* We don't use the data arg yet */ +#define __gen_user_data void +#define __gen_address_type struct vc4_cl_reloc +#define __gen_address_offset(reloc) ((reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +#include "kernel/vc4_packet.h" +#include "broadcom/cle/v3d_packet_v21_pack.h" + struct vc4_cl { void *base; struct vc4_cl_out *next; @@ -205,4 +226,46 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl, void cl_ensure_space(struct vc4_cl *cl, uint32_t size); +#define cl_packet_header(packet) V3D21_ ## packet ## _header +#define cl_packet_length(packet) V3D21_ ## packet ## _length +#define cl_packet_pack(packet) V3D21_ ## packet ## _pack +#define cl_packet_struct(packet) V3D21_ ## packet + +static inline void * +cl_get_emit_space(struct vc4_cl_out **cl, size_t size) +{ + void *addr = *cl; + cl_advance(cl, size); + return addr; +} + +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + * Also, *dst is actually of the wrong type, it's the + * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet). + */ +#define cl_emit(cl_out, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \ + __builtin_expect(_dst != NULL, 1); \ + ({ \ + cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \ + cl_packet_length(packet))); \ + _dst = NULL; \ + })) \ + #endif /* VC4_CL_H */ diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 0aee73e..4b3fa8a 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -82,32 +82,28 @@ vc4_start_draw(struct vc4_context *vc4) vc4_get_draw_cl_space(job, 0); struct vc4_cl_out *bcl = cl_start(&job->bcl); - // Tile state data is 48 bytes per tile, I think it can be thrown away - // as soon as binning is finished. - cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG); - cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */ - cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */ - cl_u32(&bcl, 0); /* tile state addr, filled by kernel */ - cl_u8(&bcl, job->draw_tiles_x); - cl_u8(&bcl, job->draw_tiles_y); - /* Other flags are filled by kernel. */ - cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0); + cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) { + bin.width_in_tiles = job->draw_tiles_x; + bin.height_in_tiles = job->draw_tiles_y; + bin.multisample_mode_4x = job->msaa; + } /* START_TILE_BINNING resets the statechange counters in the hardware, * which are what is used when a primitive is binned to a tile to * figure out what new state packets need to be written to that tile's * command list. */ - cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING); + cl_emit(&bcl, START_TILE_BINNING, start); /* Reset the current compressed primitives format. This gets modified * by VC4_PACKET_GL_INDEXED_PRIMITIVE and * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start * of every tile. */ - cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT); - cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX | - VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES)); + cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) { + list.data_type = _16_BIT_INDEX; + list.primitive_type = TRIANGLES_LIST; + } job->needs_flush = true; job->draw_width = vc4->framebuffer.width; @@ -221,13 +217,15 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, cl_end(&job->shader_rec, shader_rec); struct vc4_cl_out *bcl = cl_start(&job->bcl); - /* the actual draw call. */ - cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE); - assert(vtx->num_elements <= 8); - /* Note that number of attributes == 0 in the packet means 8 - * attributes. This field also contains the offset into shader_rec. - */ - cl_u32(&bcl, num_elements_emit & 0x7); + cl_emit(&bcl, GL_SHADER_STATE, shader_state) { + /* Note that number of attributes == 0 in the packet means 8 + * attributes. This field also contains the offset into + * shader_rec. + */ + assert(vtx->num_elements <= 8); + shader_state.number_of_attribute_arrays = + num_elements_emit & 0x7; + } cl_end(&job->bcl, bcl); vc4_write_uniforms(vc4, vc4->prog.fs, @@ -436,10 +434,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } } - cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); - cl_u8(&bcl, info->mode); - cl_u32(&bcl, this_count); - cl_u32(&bcl, start); + cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) { + array.primitive_mode = info->mode; + array.length = this_count; + array.index_of_first_vertex = start; + } job->draw_calls_queued++; count -= step; diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index b48d89a..9fc266e 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -60,11 +60,12 @@ vc4_emit_state(struct pipe_context *pctx) maxy = MIN2(vp_maxy, vc4->scissor.maxy); } - cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW); - cl_u16(&bcl, minx); - cl_u16(&bcl, miny); - cl_u16(&bcl, maxx - minx); - cl_u16(&bcl, maxy - miny); + cl_emit(&bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_height_in_pixels = maxy - miny; + clip.clip_window_width_in_pixels = maxx - minx; + } job->draw_min_x = MIN2(job->draw_min_x, minx); job->draw_min_y = MIN2(job->draw_min_y, miny); @@ -113,35 +114,51 @@ vc4_emit_state(struct pipe_context *pctx) } if (vc4->dirty & VC4_DIRTY_RASTERIZER) { - cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET); - cl_u16(&bcl, vc4->rasterizer->offset_factor); - cl_u16(&bcl, vc4->rasterizer->offset_units); + cl_emit(&bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_units = + vc4->rasterizer->offset_units; + depth.depth_offset_factor = + vc4->rasterizer->offset_factor; + } - cl_u8(&bcl, VC4_PACKET_POINT_SIZE); - cl_f(&bcl, vc4->rasterizer->point_size); + cl_emit(&bcl, POINT_SIZE, points) { + points.point_size = vc4->rasterizer->point_size; + } - cl_u8(&bcl, VC4_PACKET_LINE_WIDTH); - cl_f(&bcl, vc4->rasterizer->base.line_width); + cl_emit(&bcl, LINE_WIDTH, points) { + points.line_width = vc4->rasterizer->base.line_width; + } } if (vc4->dirty & VC4_DIRTY_VIEWPORT) { - cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING); - cl_f(&bcl, vc4->viewport.scale[0] * 16.0f); - cl_f(&bcl, vc4->viewport.scale[1] * 16.0f); + cl_emit(&bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_16th_of_pixel = + vc4->viewport.scale[0] * 16.0f; + clip.viewport_half_height_in_1_16th_of_pixel = + vc4->viewport.scale[1] * 16.0f; + } - cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING); - cl_f(&bcl, vc4->viewport.translate[2]); - cl_f(&bcl, vc4->viewport.scale[2]); + cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc4->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc4->viewport.scale[2]; + } - cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET); - cl_u16(&bcl, 16 * vc4->viewport.translate[0]); - cl_u16(&bcl, 16 * vc4->viewport.translate[1]); + cl_emit(&bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + 16 * vc4->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + 16 * vc4->viewport.translate[1]; + } } if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) { - cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS); - cl_u32(&bcl, vc4->rasterizer->base.flatshade ? - vc4->prog.fs->color_inputs : 0); + cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) { + if (vc4->rasterizer->base.flatshade) + flags.flat_shading_flags = + vc4->prog.fs->color_inputs; + } } cl_end(&job->bcl, bcl); diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index d39472e..afdac8c 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -378,11 +378,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) */ cl_ensure_space(&job->bcl, 8); struct vc4_cl_out *bcl = cl_start(&job->bcl); - cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE); + cl_emit(&bcl, INCREMENT_SEMAPHORE, incr); /* The FLUSH caps all of our bin lists with a * VC4_PACKET_RETURN. */ - cl_u8(&bcl, VC4_PACKET_FLUSH); + cl_emit(&bcl, FLUSH, flush); cl_end(&job->bcl, bcl); } struct drm_vc4_submit_cl submit = { -- 2.7.4