clang-format -fallback-style=none --style=file -i src/gallium/drivers/freedreno/*.[ch] src/gallium/drivers/freedreno/*/*.[ch]
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8883>
#include "pipe/p_state.h"
#include "util/u_blend.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd2_blend.h"
#include "fd2_context.h"
#include "fd2_util.h"
-
static enum a2xx_rb_blend_opcode
blend_func(unsigned func)
{
- switch (func) {
- case PIPE_BLEND_ADD:
- return BLEND2_DST_PLUS_SRC;
- case PIPE_BLEND_MIN:
- return BLEND2_MIN_DST_SRC;
- case PIPE_BLEND_MAX:
- return BLEND2_MAX_DST_SRC;
- case PIPE_BLEND_SUBTRACT:
- return BLEND2_SRC_MINUS_DST;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLEND2_DST_MINUS_SRC;
- default:
- DBG("invalid blend func: %x", func);
- return 0;
- }
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND2_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND2_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND2_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND2_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND2_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
}
void *
fd2_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso)
+ const struct pipe_blend_state *cso)
{
- const struct pipe_rt_blend_state *rt = &cso->rt[0];
- struct fd2_blend_stateobj *so;
- unsigned rop = PIPE_LOGICOP_COPY;
-
- if (cso->logicop_enable)
- rop = cso->logicop_func; /* 1:1 mapping with hw */
-
- if (cso->independent_blend_enable) {
- DBG("Unsupported! independent blend state");
- return NULL;
- }
-
- so = CALLOC_STRUCT(fd2_blend_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
-
- so->rb_blendcontrol =
- A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(rt->rgb_src_factor)) |
- A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
- A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor));
-
- /* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is equivalent to ONE */
- unsigned alpha_src_factor = rt->alpha_src_factor;
- if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
- alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-
- so->rb_blendcontrol |=
- A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) |
- A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
- A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(fd_blend_factor(rt->alpha_dst_factor));
-
- if (rt->colormask & PIPE_MASK_R)
- so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
- if (rt->colormask & PIPE_MASK_G)
- so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
- if (rt->colormask & PIPE_MASK_B)
- so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
- if (rt->colormask & PIPE_MASK_A)
- so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
-
- if (!rt->blend_enable)
- so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
-
- if (cso->dither)
- so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
-
- return so;
+ const struct pipe_rt_blend_state *rt = &cso->rt[0];
+ struct fd2_blend_stateobj *so;
+ unsigned rop = PIPE_LOGICOP_COPY;
+
+ if (cso->logicop_enable)
+ rop = cso->logicop_func; /* 1:1 mapping with hw */
+
+ if (cso->independent_blend_enable) {
+ DBG("Unsupported! independent blend state");
+ return NULL;
+ }
+
+ so = CALLOC_STRUCT(fd2_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
+
+ so->rb_blendcontrol =
+ A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(
+ fd_blend_factor(rt->rgb_src_factor)) |
+ A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
+ A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(
+ fd_blend_factor(rt->rgb_dst_factor));
+
+ /* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is
+ * equivalent to ONE */
+ unsigned alpha_src_factor = rt->alpha_src_factor;
+ if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+
+ so->rb_blendcontrol |=
+ A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) |
+ A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
+ A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(
+ fd_blend_factor(rt->alpha_dst_factor));
+
+ if (rt->colormask & PIPE_MASK_R)
+ so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
+ if (rt->colormask & PIPE_MASK_G)
+ so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
+ if (rt->colormask & PIPE_MASK_B)
+ so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
+ if (rt->colormask & PIPE_MASK_A)
+ so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
+
+ if (!rt->blend_enable)
+ so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
+
+ if (cso->dither)
+ so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
+
+ return so;
}
#ifndef FD2_BLEND_H_
#define FD2_BLEND_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
struct fd2_blend_stateobj {
- struct pipe_blend_state base;
- uint32_t rb_blendcontrol;
- uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
- uint32_t rb_colormask;
+ struct pipe_blend_state base;
+ uint32_t rb_blendcontrol;
+ uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
+ uint32_t rb_colormask;
};
static inline struct fd2_blend_stateobj *
fd2_blend_stateobj(struct pipe_blend_state *blend)
{
- return (struct fd2_blend_stateobj *)blend;
+ return (struct fd2_blend_stateobj *)blend;
}
-void * fd2_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso);
+void *fd2_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
#endif /* FD2_BLEND_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "fd2_context.h"
#include "fd2_blend.h"
#include "fd2_draw.h"
#include "fd2_zsa.h"
static void
-fd2_context_destroy(struct pipe_context *pctx)
- in_dt
+fd2_context_destroy(struct pipe_context *pctx) in_dt
{
- fd_context_destroy(pctx);
- free(pctx);
+ fd_context_destroy(pctx);
+ free(pctx);
}
static struct pipe_resource *
};
/* clang-format on */
- struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
- PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
- pipe_buffer_write(pctx, prsc, 0,
- sizeof(init_shader_const), init_shader_const);
- return prsc;
+ struct pipe_resource *prsc =
+ pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
+ sizeof(init_shader_const));
+ pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),
+ init_shader_const);
+ return prsc;
}
/* clang-format off */
struct pipe_context *
fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
- struct pipe_context *pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
+ struct pipe_context *pctx;
- if (!fd2_ctx)
- return NULL;
+ if (!fd2_ctx)
+ return NULL;
- pctx = &fd2_ctx->base.base;
- pctx->screen = pscreen;
+ pctx = &fd2_ctx->base.base;
+ pctx->screen = pscreen;
- fd2_ctx->base.dev = fd_device_ref(screen->dev);
- fd2_ctx->base.screen = fd_screen(pscreen);
+ fd2_ctx->base.dev = fd_device_ref(screen->dev);
+ fd2_ctx->base.screen = fd_screen(pscreen);
- pctx->destroy = fd2_context_destroy;
- pctx->create_blend_state = fd2_blend_state_create;
- pctx->create_rasterizer_state = fd2_rasterizer_state_create;
- pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
+ pctx->destroy = fd2_context_destroy;
+ pctx->create_blend_state = fd2_blend_state_create;
+ pctx->create_rasterizer_state = fd2_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
- fd2_draw_init(pctx);
- fd2_gmem_init(pctx);
- fd2_texture_init(pctx);
- fd2_prog_init(pctx);
- fd2_emit_init(pctx);
+ fd2_draw_init(pctx);
+ fd2_gmem_init(pctx);
+ fd2_texture_init(pctx);
+ fd2_prog_init(pctx);
+ fd2_emit_init(pctx);
- pctx = fd_context_init(&fd2_ctx->base, pscreen,
- (screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes,
- priv, flags);
- if (!pctx)
- return NULL;
+ pctx = fd_context_init(
+ &fd2_ctx->base, pscreen,
+ (screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes, priv, flags);
+ if (!pctx)
+ return NULL;
- /* construct vertex state used for solid ops (clear, and gmem<->mem) */
- fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+ /* construct vertex state used for solid ops (clear, and gmem<->mem) */
+ fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
- fd2_query_context_init(pctx);
+ fd2_query_context_init(pctx);
- return pctx;
+ return pctx;
}
#include "freedreno_context.h"
struct fd2_context {
- struct fd_context base;
+ struct fd_context base;
- /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
- * vertices and tex coords:
- */
- struct pipe_resource *solid_vertexbuf;
+ /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+ * vertices and tex coords:
+ */
+ struct pipe_resource *solid_vertexbuf;
};
static inline struct fd2_context *
fd2_context(struct fd_context *ctx)
{
- return (struct fd2_context *)ctx;
+ return (struct fd2_context *)ctx;
}
-struct pipe_context *
-fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+struct pipe_context *fd2_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags);
#endif /* FD2_CONTEXT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd2_draw.h"
#include "fd2_context.h"
+#include "fd2_draw.h"
#include "fd2_emit.h"
#include "fd2_program.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
-
static void
emit_cacheflush(struct fd_ringbuffer *ring)
{
- unsigned i;
+ unsigned i;
- for (i = 0; i < 12; i++) {
- OUT_PKT3(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, CACHE_FLUSH);
- }
+ for (i = 0; i < 12; i++) {
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+ }
}
static void
-emit_vertexbufs(struct fd_context *ctx)
- assert_dt
+emit_vertexbufs(struct fd_context *ctx) assert_dt
{
- struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
- struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
- struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
- unsigned i;
-
- if (!vtx->num_elements)
- return;
-
- for (i = 0; i < vtx->num_elements; i++) {
- struct pipe_vertex_element *elem = &vtx->pipe[i];
- struct pipe_vertex_buffer *vb =
- &vertexbuf->vb[elem->vertex_buffer_index];
- bufs[i].offset = vb->buffer_offset;
- bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
- bufs[i].prsc = vb->buffer.resource;
- }
-
- // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
- // CONST(20,0) (or CONST(26,0) in soliv_vp)
-
- fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
- fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
+ struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
+ struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
+ struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+ unsigned i;
+
+ if (!vtx->num_elements)
+ return;
+
+ for (i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
+ bufs[i].offset = vb->buffer_offset;
+ bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
+ bufs[i].prsc = vb->buffer.resource;
+ }
+
+ // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
+ // CONST(20,0) (or CONST(26,0) in soliv_vp)
+
+ fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
+ fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
}
static void
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
- const struct pipe_draw_start_count *draw,
- struct fd_ringbuffer *ring, unsigned index_offset, bool binning)
- assert_dt
+ const struct pipe_draw_start_count *draw, struct fd_ringbuffer *ring,
+ unsigned index_offset, bool binning) assert_dt
{
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
- OUT_RING(ring, info->index_size ? 0 : draw->start);
-
- OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
- OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
-
- if (is_a20x(ctx->screen)) {
- /* wait for DMA to finish and
- * dummy draw one triangle with indexes 0,0,0.
- * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
- *
- * this workaround is for a HW bug related to DMA alignment:
- * it is necessary for indexed draws and possibly also
- * draws that read binning data
- */
- OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
- OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
- OUT_RING(ring, 0x00000001);
-
- OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x0003c004);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000003);
- OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 0);
- OUT_RING(ring, 0x00000006);
- } else {
- OUT_WFI (ring);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, info->index_bounds_valid ? info->max_index : ~0); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, info->index_bounds_valid ? info->min_index : 0); /* VGT_MIN_VTX_INDX */
- }
-
- /* binning shader will take offset from C64 */
- if (binning && is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, 0x00000180);
- OUT_RING(ring, fui(ctx->batch->num_vertices));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(0.0f));
- }
-
- enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
- if (binning || info->mode == PIPE_PRIM_POINTS)
- vismode = IGNORE_VISIBILITY;
-
- fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
- vismode, info, draw, index_offset);
-
- if (is_a20x(ctx->screen)) {
- /* not sure why this is required, but it fixes some hangs */
- OUT_WFI(ring);
- } else {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
- OUT_RING(ring, 0x00000000);
- }
-
- emit_cacheflush(ring);
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+ OUT_RING(ring, info->index_size ? 0 : draw->start);
+
+ OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+ OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+
+ if (is_a20x(ctx->screen)) {
+ /* wait for DMA to finish and
+ * dummy draw one triangle with indexes 0,0,0.
+ * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
+ *
+ * this workaround is for a HW bug related to DMA alignment:
+ * it is necessary for indexed draws and possibly also
+ * draws that read binning data
+ */
+ OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+ OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x0003c004);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000003);
+ OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
+ 0);
+ OUT_RING(ring, 0x00000006);
+ } else {
+ OUT_WFI(ring);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, info->index_bounds_valid ? info->max_index
+ : ~0); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, info->index_bounds_valid ? info->min_index
+ : 0); /* VGT_MIN_VTX_INDX */
+ }
+
+ /* binning shader will take offset from C64 */
+ if (binning && is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000180);
+ OUT_RING(ring, fui(ctx->batch->num_vertices));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
+ if (binning || info->mode == PIPE_PRIM_POINTS)
+ vismode = IGNORE_VISIBILITY;
+
+ fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode], vismode, info,
+ draw, index_offset);
+
+ if (is_a20x(ctx->screen)) {
+ /* not sure why this is required, but it fixes some hangs */
+ OUT_WFI(ring);
+ } else {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ emit_cacheflush(ring);
}
-
static bool
fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
- const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *pdraw,
- unsigned index_offset)
- assert_dt
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *pdraw,
+ unsigned index_offset) assert_dt
{
- if (!ctx->prog.fs || !ctx->prog.vs)
- return false;
+ if (!ctx->prog.fs || !ctx->prog.vs)
+ return false;
- if (pinfo->mode != PIPE_PRIM_MAX &&
- !indirect &&
- !pinfo->primitive_restart &&
- !u_trim_pipe_prim(pinfo->mode, (unsigned*)&pdraw->count))
- return false;
+ if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart &&
+ !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
+ return false;
- if (ctx->dirty & FD_DIRTY_VTXBUF)
- emit_vertexbufs(ctx);
+ if (ctx->dirty & FD_DIRTY_VTXBUF)
+ emit_vertexbufs(ctx);
- if (fd_binning_enabled)
- fd2_emit_state_binning(ctx, ctx->dirty);
+ if (fd_binning_enabled)
+ fd2_emit_state_binning(ctx, ctx->dirty);
- fd2_emit_state(ctx, ctx->dirty);
+ fd2_emit_state(ctx, ctx->dirty);
- /* a2xx can draw only 65535 vertices at once
- * on a22x the field in the draw command is 32bits but seems limited too
- * using a limit of 32k because it fixes an unexplained hang
- * 32766 works for all primitives (multiple of 2 and 3)
- */
- if (pdraw->count > 32766) {
+ /* a2xx can draw only 65535 vertices at once
+ * on a22x the field in the draw command is 32bits but seems limited too
+ * using a limit of 32k because it fixes an unexplained hang
+ * 32766 works for all primitives (multiple of 2 and 3)
+ */
+ if (pdraw->count > 32766) {
/* clang-format off */
static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
[0 ... PIPE_PRIM_MAX - 1] = 32766,
};
/* clang-format on */
- struct pipe_draw_start_count draw = *pdraw;
- unsigned count = draw.count;
- unsigned step = step_tbl[pinfo->mode];
- unsigned num_vertices = ctx->batch->num_vertices;
-
- if (!step)
- return false;
-
- for (; count + step > 32766; count -= step) {
- draw.count = MIN2(count, 32766);
- draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
- draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
- draw.start += step;
- ctx->batch->num_vertices += step;
- }
- /* changing this value is a hack, restore it */
- ctx->batch->num_vertices = num_vertices;
- } else {
- draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
- draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
- }
-
- fd_context_all_clean(ctx);
-
- return true;
+ struct pipe_draw_start_count draw = *pdraw;
+ unsigned count = draw.count;
+ unsigned step = step_tbl[pinfo->mode];
+ unsigned num_vertices = ctx->batch->num_vertices;
+
+ if (!step)
+ return false;
+
+ for (; count + step > 32766; count -= step) {
+ draw.count = MIN2(count, 32766);
+ draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
+ draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
+ draw.start += step;
+ ctx->batch->num_vertices += step;
+ }
+ /* changing this value is a hack, restore it */
+ ctx->batch->num_vertices = num_vertices;
+ } else {
+ draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
+ draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
+ }
+
+ fd_context_all_clean(ctx);
+
+ return true;
}
static void
clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
- unsigned buffers, bool fast_clear)
- assert_dt
+ unsigned buffers, bool fast_clear) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd2_context *fd2_ctx = fd2_context(ctx);
- uint32_t reg;
-
- fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
- { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
- }, 1);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
- OUT_RING(ring, 0);
-
- fd2_program_emit(ctx, ring, &ctx->solid_prog);
-
- OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
- OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
-
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
- reg = 0;
- if (buffers & PIPE_CLEAR_DEPTH) {
- reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
- A2XX_RB_DEPTHCONTROL_Z_ENABLE |
- A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
- A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
- }
- if (buffers & PIPE_CLEAR_STENCIL) {
- reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
- A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
- A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
- }
- OUT_RING(ring, reg);
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
- OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
- A2XX_RB_COLORCONTROL_BLEND_DISABLE |
- A2XX_RB_COLORCONTROL_ROP_CODE(12) |
- A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
- A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
- OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
- OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
- A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
- (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
-
- if (fast_clear) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
- OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
- OUT_RING(ring, 0x0000ffff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
- if (buffers & PIPE_CLEAR_COLOR) {
- OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
- A2XX_RB_COLOR_MASK_WRITE_GREEN |
- A2XX_RB_COLOR_MASK_WRITE_BLUE |
- A2XX_RB_COLOR_MASK_WRITE_ALPHA);
- } else {
- OUT_RING(ring, 0x0);
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, 0);
-
- if (is_a20x(batch->ctx->screen))
- return;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
- OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
- OUT_RING(ring, 0x00000084);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000028f);
+ struct fd_context *ctx = batch->ctx;
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ uint32_t reg;
+
+ fd2_emit_vertex_bufs(ring, 0x9c,
+ (struct fd2_vertex_buf[]){
+ {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
+ },
+ 1);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+ OUT_RING(ring, 0);
+
+ fd2_program_emit(ctx, ring, &ctx->solid_prog);
+
+ OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+ OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+ reg = 0;
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
+ A2XX_RB_DEPTHCONTROL_Z_ENABLE |
+ A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
+ A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+ }
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
+ A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+ A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+ }
+ OUT_RING(ring, reg);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+ OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
+ A2XX_RB_COLORCONTROL_BLEND_DISABLE |
+ A2XX_RB_COLORCONTROL_ROP_CODE(12) |
+ A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+ OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
+ OUT_RING(
+ ring,
+ A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
+ A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
+
+ if (fast_clear) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+ OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+ OUT_RING(ring, 0x0000ffff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+ if (buffers & PIPE_CLEAR_COLOR) {
+ OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
+ A2XX_RB_COLOR_MASK_WRITE_GREEN |
+ A2XX_RB_COLOR_MASK_WRITE_BLUE |
+ A2XX_RB_COLOR_MASK_WRITE_ALPHA);
+ } else {
+ OUT_RING(ring, 0x0);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, 0);
+
+ if (is_a20x(batch->ctx->screen))
+ return;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring,
+ 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+ OUT_RING(ring, 0x00000084);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000028f);
}
static void
clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
- if (is_a20x(ctx->screen))
- return;
+ if (is_a20x(ctx->screen))
+ return;
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
- OUT_RING(ring, 0x00000000);
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, 0x00000000);
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
- OUT_RING(ring, 0x00000000);
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+ OUT_RING(ring, 0x00000000);
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000003b);
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000003b);
}
static void
clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
- uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
+ uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
{
- BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
-
- /* zero values are patched in */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
- OUT_RINGP(ring, patch_type, &batch->gmem_patches);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 4);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
- OUT_RING(ring, 0x8000 | 32);
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
-
- /* set fill values */
- if (!is_a20x(batch->ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
- OUT_RING(ring, color_clear);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
- OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
- A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
- OUT_RING(ring, depth_clear);
- } else {
- const float sc = 1.0f / 255.0f;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, 0x00000480);
- OUT_RING(ring, fui((float) (color_clear >> 0 & 0xff) * sc));
- OUT_RING(ring, fui((float) (color_clear >> 8 & 0xff) * sc));
- OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc));
- OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc));
-
- // XXX if using float the rounding error breaks it..
- float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff);
- assert((unsigned) (((double) depth * (double) 0xffffff)) ==
- (depth_clear >> 8));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(depth));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
- OUT_RING(ring, 0xff000000 |
- A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
- A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 |
- A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
- A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
- }
-
- fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
+
+ /* zero values are patched in */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+ OUT_RINGP(ring, patch_type, &batch->gmem_patches);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RING(ring, 0x8000 | 32);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* set fill values */
+ if (!is_a20x(batch->ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+ OUT_RING(ring, color_clear);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+ A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+ OUT_RING(ring, depth_clear);
+ } else {
+ const float sc = 1.0f / 255.0f;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000480);
+ OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
+ OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
+ OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
+ OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
+
+ // XXX if using float the rounding error breaks it..
+ float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
+ assert((unsigned)(((double)depth * (double)0xffffff)) ==
+ (depth_clear >> 8));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(depth));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring,
+ 0xff000000 |
+ A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
+ A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ }
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
}
static bool
fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
- assert_dt
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) assert_dt
{
- /* using 4x MSAA allows clearing ~2x faster
- * then we can use higher bpp clearing to clear lower bpp
- * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
- * note: its possible to clear with 32_32_32_32 format but its not faster
- * note: fast clear doesn't work with sysmem rendering
- * (sysmem rendering is disabled when clear is used)
- *
- * we only have 16-bit / 32-bit color formats
- * and 16-bit / 32-bit depth formats
- * so there are only a few possible combinations
- *
- * if the bpp of the color/depth doesn't match
- * we clear with depth/color individually
- */
- struct fd2_context *fd2_ctx = fd2_context(ctx);
- struct fd_batch *batch = ctx->batch;
- struct fd_ringbuffer *ring = batch->draw;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- uint32_t color_clear = 0, depth_clear = 0;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
- int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
- int color_size = -1;
-
- /* TODO: need to test performance on a22x */
- if (!is_a20x(ctx->screen))
- return false;
-
- if (buffers & PIPE_CLEAR_COLOR)
- color_size = util_format_get_blocksizebits(format) == 32;
-
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- /* no fast clear when clearing only one component of depth+stencil buffer */
- if (!(buffers & PIPE_CLEAR_DEPTH))
- return false;
-
- if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
- pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
- !(buffers & PIPE_CLEAR_STENCIL))
- return false;
-
- depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
- }
-
- assert(color_size >= 0 || depth_size >= 0);
-
- if (color_size == 0) {
- color_clear = pack_rgba(format, color->f);
- color_clear = (color_clear << 16) | (color_clear & 0xffff);
- } else if (color_size == 1) {
- color_clear = pack_rgba(format, color->f);
- }
-
- if (depth_size == 0) {
- depth_clear = (uint32_t)(0xffff * depth);
- depth_clear |= depth_clear << 16;
- } else if (depth_size == 1) {
- depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
- depth_clear |= (stencil & 0xff);
- }
-
- /* disable "window" scissor.. */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, xy2d(0, 0));
- OUT_RING(ring, xy2d(0x7fff, 0x7fff));
-
- /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
- OUT_RING(ring, fui(4096.0));
- OUT_RING(ring, fui(4096.0));
- OUT_RING(ring, fui(4096.0));
- OUT_RING(ring, fui(4096.0));
-
- clear_state(batch, ring, ~0u, true);
-
- if (color_size >= 0 && depth_size != color_size)
- clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR);
-
- if (depth_size >= 0 && depth_size != color_size)
- clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH);
-
- if (depth_size == color_size)
- clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
-
- clear_state_restore(ctx, ring);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
- OUT_RING(ring, 0);
-
- /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
- * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
- * the value is read from byte offset 60 in the given bo
- */
- OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
- OUT_RING(ring, 1);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 4);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
- OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
- return true;
+ /* using 4x MSAA allows clearing ~2x faster
+ * then we can use higher bpp clearing to clear lower bpp
+ * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
+ * note: its possible to clear with 32_32_32_32 format but its not faster
+ * note: fast clear doesn't work with sysmem rendering
+ * (sysmem rendering is disabled when clear is used)
+ *
+ * we only have 16-bit / 32-bit color formats
+ * and 16-bit / 32-bit depth formats
+ * so there are only a few possible combinations
+ *
+ * if the bpp of the color/depth doesn't match
+ * we clear with depth/color individually
+ */
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ struct fd_batch *batch = ctx->batch;
+ struct fd_ringbuffer *ring = batch->draw;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ uint32_t color_clear = 0, depth_clear = 0;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
+ int color_size = -1;
+
+ /* TODO: need to test performance on a22x */
+ if (!is_a20x(ctx->screen))
+ return false;
+
+ if (buffers & PIPE_CLEAR_COLOR)
+ color_size = util_format_get_blocksizebits(format) == 32;
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ /* no fast clear when clearing only one component of depth+stencil buffer */
+ if (!(buffers & PIPE_CLEAR_DEPTH))
+ return false;
+
+ if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+ pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
+ !(buffers & PIPE_CLEAR_STENCIL))
+ return false;
+
+ depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
+ }
+
+ assert(color_size >= 0 || depth_size >= 0);
+
+ if (color_size == 0) {
+ color_clear = pack_rgba(format, color->f);
+ color_clear = (color_clear << 16) | (color_clear & 0xffff);
+ } else if (color_size == 1) {
+ color_clear = pack_rgba(format, color->f);
+ }
+
+ if (depth_size == 0) {
+ depth_clear = (uint32_t)(0xffff * depth);
+ depth_clear |= depth_clear << 16;
+ } else if (depth_size == 1) {
+ depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
+ depth_clear |= (stencil & 0xff);
+ }
+
+ /* disable "window" scissor.. */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(0, 0));
+ OUT_RING(ring, xy2d(0x7fff, 0x7fff));
+
+ /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+
+ clear_state(batch, ring, ~0u, true);
+
+ if (color_size >= 0 && depth_size != color_size)
+ clear_fast(batch, ring, color_clear, color_clear,
+ GMEM_PATCH_FASTCLEAR_COLOR);
+
+ if (depth_size >= 0 && depth_size != color_size)
+ clear_fast(batch, ring, depth_clear, depth_clear,
+ GMEM_PATCH_FASTCLEAR_DEPTH);
+
+ if (depth_size == color_size)
+ clear_fast(batch, ring, color_clear, depth_clear,
+ GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
+
+ clear_state_restore(ctx, ring);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+ OUT_RING(ring, 0);
+
+ /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
+ * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
+ * the value is read from byte offset 60 in the given bo
+ */
+ OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+ OUT_RING(ring, 1);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ return true;
}
static bool
fd2_clear(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
- assert_dt
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) assert_dt
{
- struct fd_ringbuffer *ring = ctx->batch->draw;
- struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
-
- if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
- goto dirty;
-
- /* set clear value */
- if (is_a20x(ctx->screen)) {
- if (buffers & PIPE_CLEAR_COLOR) {
- /* C0 used by fragment shader */
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, 0x00000480);
- OUT_RING(ring, color->ui[0]);
- OUT_RING(ring, color->ui[1]);
- OUT_RING(ring, color->ui[2]);
- OUT_RING(ring, color->ui[3]);
- }
-
- if (buffers & PIPE_CLEAR_DEPTH) {
- /* use viewport to set depth value */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(depth));
- }
-
- if (buffers & PIPE_CLEAR_STENCIL) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
- OUT_RING(ring, 0xff000000 |
- A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
- A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 |
- A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
- A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
- }
- } else {
- if (buffers & PIPE_CLEAR_COLOR) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
- OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
- }
-
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- uint32_t clear_mask, depth_clear;
- switch (fd_pipe2depth(fb->zsbuf->format)) {
- case DEPTHX_24_8:
- clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
- ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
- depth_clear = (((uint32_t)(0xffffff * depth)) << 8) |
- (stencil & 0xff);
- break;
- case DEPTHX_16:
- clear_mask = 0xf;
- depth_clear = (uint32_t)(0xffffffff * depth);
- break;
- default:
- unreachable("invalid depth");
- break;
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
- OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
- A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
- OUT_RING(ring, depth_clear);
- }
- }
-
- /* scissor state */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, xy2d(0, 0));
- OUT_RING(ring, xy2d(fb->width, fb->height));
-
- /* viewport state */
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
- OUT_RING(ring, fui((float) fb->width / 2.0));
- OUT_RING(ring, fui((float) fb->width / 2.0));
- OUT_RING(ring, fui((float) fb->height / 2.0));
- OUT_RING(ring, fui((float) fb->height / 2.0));
-
- /* common state */
- clear_state(ctx->batch, ring, buffers, false);
-
- fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
-
- clear_state_restore(ctx, ring);
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+ struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
+
+ if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
+ goto dirty;
+
+ /* set clear value */
+ if (is_a20x(ctx->screen)) {
+ if (buffers & PIPE_CLEAR_COLOR) {
+ /* C0 used by fragment shader */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000480);
+ OUT_RING(ring, color->ui[0]);
+ OUT_RING(ring, color->ui[1]);
+ OUT_RING(ring, color->ui[2]);
+ OUT_RING(ring, color->ui[3]);
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ /* use viewport to set depth value */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(depth));
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
+ A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ }
+ } else {
+ if (buffers & PIPE_CLEAR_COLOR) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+ OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
+ }
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ uint32_t clear_mask, depth_clear;
+ switch (fd_pipe2depth(fb->zsbuf->format)) {
+ case DEPTHX_24_8:
+ clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
+ ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
+ depth_clear =
+ (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
+ break;
+ case DEPTHX_16:
+ clear_mask = 0xf;
+ depth_clear = (uint32_t)(0xffffffff * depth);
+ break;
+ default:
+ unreachable("invalid depth");
+ break;
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+ A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+ OUT_RING(ring, depth_clear);
+ }
+ }
+
+ /* scissor state */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(0, 0));
+ OUT_RING(ring, xy2d(fb->width, fb->height));
+
+ /* viewport state */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui((float)fb->width / 2.0));
+ OUT_RING(ring, fui((float)fb->width / 2.0));
+ OUT_RING(ring, fui((float)fb->height / 2.0));
+ OUT_RING(ring, fui((float)fb->height / 2.0));
+
+ /* common state */
+ clear_state(ctx->batch, ring, buffers, false);
+
+ fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+
+ clear_state_restore(ctx, ring);
dirty:
- ctx->dirty |= FD_DIRTY_ZSA |
- FD_DIRTY_VIEWPORT |
- FD_DIRTY_RASTERIZER |
- FD_DIRTY_SAMPLE_MASK |
- FD_DIRTY_PROG |
- FD_DIRTY_CONST |
- FD_DIRTY_BLEND |
- FD_DIRTY_FRAMEBUFFER |
- FD_DIRTY_SCISSOR;
-
- ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
- ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
-
- return true;
+ ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
+ FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
+ FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
+
+ ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
+ ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
+ FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
+
+ return true;
}
void
-fd2_draw_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->draw_vbo = fd2_draw_vbo;
- ctx->clear = fd2_clear;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd2_draw_vbo;
+ ctx->clear = fd2_clear;
}
void fd2_draw_init(struct pipe_context *pctx);
enum {
- GMEM_PATCH_FASTCLEAR_COLOR,
- GMEM_PATCH_FASTCLEAR_DEPTH,
- GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
- GMEM_PATCH_RESTORE_INFO,
+ GMEM_PATCH_FASTCLEAR_COLOR,
+ GMEM_PATCH_FASTCLEAR_DEPTH,
+ GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
+ GMEM_PATCH_RESTORE_INFO,
};
#endif /* FD2_DRAW_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_resource.h"
-#include "fd2_emit.h"
#include "fd2_blend.h"
#include "fd2_context.h"
+#include "fd2_emit.h"
#include "fd2_program.h"
#include "fd2_rasterizer.h"
#include "fd2_texture.h"
static void
emit_constants(struct fd_ringbuffer *ring, uint32_t base,
- struct fd_constbuf_stateobj *constbuf,
- struct fd2_shader_stateobj *shader)
+ struct fd_constbuf_stateobj *constbuf,
+ struct fd2_shader_stateobj *shader)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t start_base = base;
- unsigned i;
-
- /* emit user constants: */
- while (enabled_mask) {
- unsigned index = ffs(enabled_mask) - 1;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* hmm, sometimes we still seem to end up with consts bound,
- * even if shader isn't using them, which ends up overwriting
- * const reg's used for immediates.. this is a hack to work
- * around that:
- */
- if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
- break;
-
- const uint32_t *dwords;
-
- if (cb->user_buffer) {
- dwords = cb->user_buffer;
- } else {
- struct fd_resource *rsc = fd_resource(cb->buffer);
- dwords = fd_bo_map(rsc->bo);
- }
-
- dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
- OUT_RING(ring, base);
- for (i = 0; i < size; i++)
- OUT_RING(ring, *(dwords++));
-
- base += size;
- enabled_mask &= ~(1 << index);
- }
-
- /* emit shader immediates: */
- if (shader) {
- for (i = 0; i < shader->num_immediates; i++) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
- OUT_RING(ring, shader->immediates[i].val[0]);
- OUT_RING(ring, shader->immediates[i].val[1]);
- OUT_RING(ring, shader->immediates[i].val[2]);
- OUT_RING(ring, shader->immediates[i].val[3]);
- base += 4;
- }
- }
+ uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t start_base = base;
+ unsigned i;
+
+ /* emit user constants: */
+ while (enabled_mask) {
+ unsigned index = ffs(enabled_mask) - 1;
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* hmm, sometimes we still seem to end up with consts bound,
+ * even if shader isn't using them, which ends up overwriting
+ * const reg's used for immediates.. this is a hack to work
+ * around that:
+ */
+ if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
+ break;
+
+ const uint32_t *dwords;
+
+ if (cb->user_buffer) {
+ dwords = cb->user_buffer;
+ } else {
+ struct fd_resource *rsc = fd_resource(cb->buffer);
+ dwords = fd_bo_map(rsc->bo);
+ }
+
+ dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
+ OUT_RING(ring, base);
+ for (i = 0; i < size; i++)
+ OUT_RING(ring, *(dwords++));
+
+ base += size;
+ enabled_mask &= ~(1 << index);
+ }
+
+ /* emit shader immediates: */
+ if (shader) {
+ for (i = 0; i < shader->num_immediates; i++) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
+ OUT_RING(ring, shader->immediates[i].val[0]);
+ OUT_RING(ring, shader->immediates[i].val[1]);
+ OUT_RING(ring, shader->immediates[i].val[2]);
+ OUT_RING(ring, shader->immediates[i].val[3]);
+ base += 4;
+ }
+ }
}
typedef uint32_t texmask;
static texmask
emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
- struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
+ struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
{
- unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
- static const struct fd2_sampler_stateobj dummy_sampler = {};
- static const struct fd2_pipe_sampler_view dummy_view = {};
- const struct fd2_sampler_stateobj *sampler;
- const struct fd2_pipe_sampler_view *view;
- struct fd_resource *rsc;
-
- if (emitted & (1 << const_idx))
- return 0;
-
- sampler = tex->samplers[samp_id] ?
- fd2_sampler_stateobj(tex->samplers[samp_id]) :
- &dummy_sampler;
- view = tex->textures[samp_id] ?
- fd2_pipe_sampler_view(tex->textures[samp_id]) :
- &dummy_view;
-
- rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 7);
- OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
-
- OUT_RING(ring, sampler->tex0 | view->tex0);
- if (rsc)
- OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
- else
- OUT_RING(ring, 0);
-
- OUT_RING(ring, view->tex2);
- OUT_RING(ring, sampler->tex3 | view->tex3);
- OUT_RING(ring, sampler->tex4 | view->tex4);
-
- if (rsc && rsc->b.b.last_level)
- OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
- else
- OUT_RING(ring, view->tex5);
-
- return (1 << const_idx);
+ unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
+ static const struct fd2_sampler_stateobj dummy_sampler = {};
+ static const struct fd2_pipe_sampler_view dummy_view = {};
+ const struct fd2_sampler_stateobj *sampler;
+ const struct fd2_pipe_sampler_view *view;
+ struct fd_resource *rsc;
+
+ if (emitted & (1 << const_idx))
+ return 0;
+
+ sampler = tex->samplers[samp_id]
+ ? fd2_sampler_stateobj(tex->samplers[samp_id])
+ : &dummy_sampler;
+ view = tex->textures[samp_id] ? fd2_pipe_sampler_view(tex->textures[samp_id])
+ : &dummy_view;
+
+ rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+ OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
+
+ OUT_RING(ring, sampler->tex0 | view->tex0);
+ if (rsc)
+ OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
+ else
+ OUT_RING(ring, 0);
+
+ OUT_RING(ring, view->tex2);
+ OUT_RING(ring, sampler->tex3 | view->tex3);
+ OUT_RING(ring, sampler->tex4 | view->tex4);
+
+ if (rsc && rsc->b.b.last_level)
+ OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
+ else
+ OUT_RING(ring, view->tex5);
+
+ return (1 << const_idx);
}
static void
emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
{
- struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT];
- struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX];
- texmask emitted = 0;
- unsigned i;
-
- for (i = 0; i < verttex->num_samplers; i++)
- if (verttex->samplers[i])
- emitted |= emit_texture(ring, ctx, verttex, i, emitted);
-
- for (i = 0; i < fragtex->num_samplers; i++)
- if (fragtex->samplers[i])
- emitted |= emit_texture(ring, ctx, fragtex, i, emitted);
+ struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT];
+ struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX];
+ texmask emitted = 0;
+ unsigned i;
+
+ for (i = 0; i < verttex->num_samplers; i++)
+ if (verttex->samplers[i])
+ emitted |= emit_texture(ring, ctx, verttex, i, emitted);
+
+ for (i = 0; i < fragtex->num_samplers; i++)
+ if (fragtex->samplers[i])
+ emitted |= emit_texture(ring, ctx, fragtex, i, emitted);
}
void
fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
- struct fd2_vertex_buf *vbufs, uint32_t n)
+ struct fd2_vertex_buf *vbufs, uint32_t n)
{
- unsigned i;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
- OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
- for (i = 0; i < n; i++) {
- struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
- OUT_RING (ring, vbufs[i].size);
- }
+ unsigned i;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
+ OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
+ for (i = 0; i < n; i++) {
+ struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
+ OUT_RING(ring, vbufs[i].size);
+ }
}
void
-fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
+fd2_emit_state_binning(struct fd_context *ctx,
+ const enum fd_dirty_3d_state dirty)
{
- struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
- struct fd_ringbuffer *ring = ctx->batch->binning;
-
- /* subset of fd2_emit_state needed for hw binning on a20x */
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
- fd2_program_emit(ctx, ring, &ctx->prog);
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
- emit_constants(ring, VS_CONST_BASE * 4,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 9);
- OUT_RING(ring, 0x00000184);
- OUT_RING(ring, fui(ctx->viewport.translate[0]));
- OUT_RING(ring, fui(ctx->viewport.translate[1]));
- OUT_RING(ring, fui(ctx->viewport.translate[2]));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(ctx->viewport.scale[0]));
- OUT_RING(ring, fui(ctx->viewport.scale[1]));
- OUT_RING(ring, fui(ctx->viewport.scale[2]));
- OUT_RING(ring, fui(0.0f));
- }
-
- /* not sure why this is needed */
- if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, blend->rb_blendcontrol);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
- OUT_RING(ring, blend->rb_colormask);
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
- OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
+ struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
+ struct fd_ringbuffer *ring = ctx->batch->binning;
+
+ /* subset of fd2_emit_state needed for hw binning on a20x */
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
+ fd2_program_emit(ctx, ring, &ctx->prog);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
+ emit_constants(ring, VS_CONST_BASE * 4,
+ &ctx->constbuf[PIPE_SHADER_VERTEX],
+ (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 9);
+ OUT_RING(ring, 0x00000184);
+ OUT_RING(ring, fui(ctx->viewport.translate[0]));
+ OUT_RING(ring, fui(ctx->viewport.translate[1]));
+ OUT_RING(ring, fui(ctx->viewport.translate[2]));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(ctx->viewport.scale[0]));
+ OUT_RING(ring, fui(ctx->viewport.scale[1]));
+ OUT_RING(ring, fui(ctx->viewport.scale[2]));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ /* not sure why this is needed */
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, blend->rb_blendcontrol);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+ OUT_RING(ring, blend->rb_colormask);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+ OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
}
void
fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
{
- struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
- struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
- struct fd2_shader_stateobj *fs = ctx->prog.fs;
- struct fd_ringbuffer *ring = ctx->batch->draw;
-
- /* NOTE: we probably want to eventually refactor this so each state
- * object handles emitting it's own state.. although the mapping of
- * state to registers is not always orthogonal, sometimes a single
- * register contains bitfields coming from multiple state objects,
- * so not sure the best way to deal with that yet.
- */
-
- if (dirty & FD_DIRTY_SAMPLE_MASK) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
- OUT_RING(ring, ctx->sample_mask);
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- uint32_t val = zsa->rb_depthcontrol;
-
- if (fs->has_kill)
- val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
- OUT_RING(ring, val);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 4);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
- OUT_RING(ring, zsa->rb_stencilrefmask_bf |
- A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
- OUT_RING(ring, zsa->rb_stencilrefmask |
- A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
- OUT_RING(ring, zsa->rb_alpha_ref);
- }
-
- if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
- struct fd2_rasterizer_stateobj *rasterizer =
- fd2_rasterizer_stateobj(ctx->rasterizer);
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
- OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
- OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
- A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
- OUT_RING(ring, rasterizer->pa_su_point_size);
- OUT_RING(ring, rasterizer->pa_su_point_minmax);
- OUT_RING(ring, rasterizer->pa_su_line_cntl);
- OUT_RING(ring, rasterizer->pa_sc_line_stipple);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 6);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
- OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
- OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */
- OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */
- OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */
- OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */
-
- if (rasterizer->base.offset_tri) {
- /* TODO: why multiply scale by 2 ? without it deqp test fails
- * deqp/piglit tests aren't very precise
- */
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE));
- OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */
- OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */
- OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */
- OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */
- }
- }
-
- /* NOTE: scissor enabled bit is part of rasterizer state: */
- if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
- struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */
- scissor->miny));
- OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */
- scissor->maxy));
-
- ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
- ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
- ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
- ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 7);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
- OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */
- OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */
- OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */
- OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
- OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
- OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
-
- /* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
- OUT_PKT3(ring, CP_SET_CONSTANT, 9);
- OUT_RING(ring, 0x00000184);
-
- OUT_RING(ring, fui(ctx->viewport.translate[0]));
- OUT_RING(ring, fui(ctx->viewport.translate[1]));
- OUT_RING(ring, fui(ctx->viewport.translate[2]));
- OUT_RING(ring, fui(0.0f));
-
- OUT_RING(ring, fui(ctx->viewport.scale[0]));
- OUT_RING(ring, fui(ctx->viewport.scale[1]));
- OUT_RING(ring, fui(ctx->viewport.scale[2]));
- OUT_RING(ring, fui(0.0f));
- }
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
- fd2_program_emit(ctx, ring, &ctx->prog);
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
- emit_constants(ring, VS_CONST_BASE * 4,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
- emit_constants(ring, PS_CONST_BASE * 4,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL);
- }
-
- if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
- OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
- }
-
- if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, blend->rb_blendcontrol);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
- OUT_RING(ring, blend->rb_colormask);
- }
-
- if (dirty & FD_DIRTY_BLEND_COLOR) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
- OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
- OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
- OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
- OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
- }
-
- if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
- emit_textures(ring, ctx);
+ struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
+ struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
+ struct fd2_shader_stateobj *fs = ctx->prog.fs;
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+
+ /* NOTE: we probably want to eventually refactor this so each state
+ * object handles emitting it's own state.. although the mapping of
+ * state to registers is not always orthogonal, sometimes a single
+ * register contains bitfields coming from multiple state objects,
+ * so not sure the best way to deal with that yet.
+ */
+
+ if (dirty & FD_DIRTY_SAMPLE_MASK) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+ OUT_RING(ring, ctx->sample_mask);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+ uint32_t val = zsa->rb_depthcontrol;
+
+ if (fs->has_kill)
+ val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+ OUT_RING(ring, val);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_alpha_ref);
+ }
+
+ if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
+ struct fd2_rasterizer_stateobj *rasterizer =
+ fd2_rasterizer_stateobj(ctx->rasterizer);
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+ OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
+ OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
+ A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
+ OUT_RING(ring, rasterizer->pa_su_point_size);
+ OUT_RING(ring, rasterizer->pa_su_point_minmax);
+ OUT_RING(ring, rasterizer->pa_su_line_cntl);
+ OUT_RING(ring, rasterizer->pa_sc_line_stipple);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 6);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
+ OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
+ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */
+ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */
+ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */
+ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */
+
+ if (rasterizer->base.offset_tri) {
+ /* TODO: why multiply scale by 2 ? without it deqp test fails
+ * deqp/piglit tests aren't very precise
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE));
+ OUT_RING(ring,
+ fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */
+ OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */
+ OUT_RING(ring,
+ fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */
+ OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */
+ }
+ }
+
+ /* NOTE: scissor enabled bit is part of rasterizer state: */
+ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */
+ scissor->miny));
+ OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */
+ scissor->maxy));
+
+ ctx->batch->max_scissor.minx =
+ MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+ ctx->batch->max_scissor.miny =
+ MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+ ctx->batch->max_scissor.maxx =
+ MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+ ctx->batch->max_scissor.maxy =
+ MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */
+ OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */
+ OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */
+ OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
+ OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
+ OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
+
+ /* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 9);
+ OUT_RING(ring, 0x00000184);
+
+ OUT_RING(ring, fui(ctx->viewport.translate[0]));
+ OUT_RING(ring, fui(ctx->viewport.translate[1]));
+ OUT_RING(ring, fui(ctx->viewport.translate[2]));
+ OUT_RING(ring, fui(0.0f));
+
+ OUT_RING(ring, fui(ctx->viewport.scale[0]));
+ OUT_RING(ring, fui(ctx->viewport.scale[1]));
+ OUT_RING(ring, fui(ctx->viewport.scale[2]));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
+ fd2_program_emit(ctx, ring, &ctx->prog);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
+ emit_constants(ring, VS_CONST_BASE * 4,
+ &ctx->constbuf[PIPE_SHADER_VERTEX],
+ (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
+ emit_constants(ring, PS_CONST_BASE * 4,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL);
+ }
+
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+ OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
+ }
+
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, blend->rb_blendcontrol);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+ OUT_RING(ring, blend->rb_colormask);
+ }
+
+ if (dirty & FD_DIRTY_BLEND_COLOR) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
+ }
+
+ if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
+ emit_textures(ring, ctx);
}
/* emit per-context initialization:
void
fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
- if (is_a20x(ctx->screen)) {
- OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
- OUT_RING(ring,
- A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
- A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
- A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
- A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
- A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
-
- /* not sure why this is required */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
- OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x00000002);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
- OUT_RING(ring, 0x00000002);
- } else {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000003b);
- }
-
- /* enable perfcntrs */
- OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
- OUT_RING(ring, COND(FD_DBG(PERFC), 1));
-
- /* note: perfcntrs don't work without the PM_OVERRIDE bit */
- OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0x00000fff);
-
- OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
- OUT_RING(ring, 0x00000002);
-
- OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
- OUT_RING(ring, 0x00007fff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
- OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
- A2XX_SQ_VS_CONST_SIZE(0x100));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
- OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) |
- A2XX_SQ_PS_CONST_SIZE(0xe0));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
- OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
- OUT_RING(ring, 0xffffffff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
- OUT_RING(ring, 0x00000000);
-
- // XXX we change this dynamically for draw/clear.. vs gmem<->mem..
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
- OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
- OUT_RING(ring, 0x88888888);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
- OUT_RING(ring, 0xffffffff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
- OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
- A2XX_RB_COPY_DEST_INFO_WRITE_RED |
- A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
- A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
- A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
- OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */
- OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */
-
- OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
- OUT_RING(ring, 0x000005d0);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x5f601000);
- OUT_RING(ring, 0x00000001);
-
- OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
- OUT_RING(ring, 0x00000180);
-
- OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
- OUT_RING(ring, 0x00000300);
-
- OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
- OUT_RING(ring, 0x80000180);
-
- /* not sure what this form of CP_SET_CONSTANT is.. */
- OUT_PKT3(ring, CP_SET_CONSTANT, 13);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x469c4000);
- OUT_RING(ring, 0x3f800000);
- OUT_RING(ring, 0x3f000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x40000000);
- OUT_RING(ring, 0x3f400000);
- OUT_RING(ring, 0x3ec00000);
- OUT_RING(ring, 0x3e800000);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
- OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
- A2XX_RB_COLOR_MASK_WRITE_GREEN |
- A2XX_RB_COLOR_MASK_WRITE_BLUE |
- A2XX_RB_COLOR_MASK_WRITE_ALPHA);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
- OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
- OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
- OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
- OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
- OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
- A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+ if (is_a20x(ctx->screen)) {
+ OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
+ OUT_RING(ring, A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
+ A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
+ A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
+ A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
+ A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
+
+ /* not sure why this is required */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
+ OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x00000002);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
+ OUT_RING(ring, 0x00000002);
+ } else {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000003b);
+ }
+
+ /* enable perfcntrs */
+ OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
+ OUT_RING(ring, COND(FD_DBG(PERFC), 1));
+
+ /* note: perfcntrs don't work without the PM_OVERRIDE bit */
+ OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0x00000fff);
+
+ OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
+ OUT_RING(ring, 0x00000002);
+
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
+ OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
+ A2XX_SQ_VS_CONST_SIZE(0x100));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
+ OUT_RING(ring,
+ A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) | A2XX_SQ_PS_CONST_SIZE(0xe0));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
+ OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
+ OUT_RING(ring, 0xffffffff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+ OUT_RING(ring, 0x00000000);
+
+ // XXX we change this dynamically for draw/clear.. vs gmem<->mem..
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+ OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
+ OUT_RING(ring, 0x88888888);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
+ OUT_RING(ring, 0xffffffff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
+ OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
+ A2XX_RB_COPY_DEST_INFO_WRITE_RED |
+ A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
+ A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
+ A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
+ OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */
+ OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */
+
+ OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+ OUT_RING(ring, 0x000005d0);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x5f601000);
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
+ OUT_RING(ring, 0x00000180);
+
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00000300);
+
+ OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
+ OUT_RING(ring, 0x80000180);
+
+ /* not sure what this form of CP_SET_CONSTANT is.. */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 13);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x469c4000);
+ OUT_RING(ring, 0x3f800000);
+ OUT_RING(ring, 0x3f000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x40000000);
+ OUT_RING(ring, 0x3f400000);
+ OUT_RING(ring, 0x3ec00000);
+ OUT_RING(ring, 0x3e800000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+ OUT_RING(ring,
+ A2XX_RB_COLOR_MASK_WRITE_RED | A2XX_RB_COLOR_MASK_WRITE_GREEN |
+ A2XX_RB_COLOR_MASK_WRITE_BLUE | A2XX_RB_COLOR_MASK_WRITE_ALPHA);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
+ OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+ OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
}
void
fd2_emit_init_screen(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->emit_ib = fd2_emit_ib;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->emit_ib = fd2_emit_ib;
}
void
struct fd_ringbuffer;
struct fd2_vertex_buf {
- unsigned offset, size;
- struct pipe_resource *prsc;
+ unsigned offset, size;
+ struct pipe_resource *prsc;
};
void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
- struct fd2_vertex_buf *vbufs, uint32_t n);
-void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt;
-void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt;
+ struct fd2_vertex_buf *vbufs, uint32_t n);
+void fd2_emit_state_binning(struct fd_context *ctx,
+ const enum fd_dirty_3d_state dirty) assert_dt;
+void fd2_emit_state(struct fd_context *ctx,
+ const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd2_emit_init_screen(struct pipe_screen *pscreen);
static inline void
fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- __OUT_IB(ring, false, target);
+ __OUT_IB(ring, false, target);
}
#endif /* FD2_EMIT_H */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_draw.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd2_gmem.h"
+#include "ir2/instr-a2xx.h"
#include "fd2_context.h"
+#include "fd2_draw.h"
#include "fd2_emit.h"
+#include "fd2_gmem.h"
#include "fd2_program.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
-#include "fd2_draw.h"
-#include "ir2/instr-a2xx.h"
-static uint32_t fmt2swap(enum pipe_format format)
+static uint32_t
+fmt2swap(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B5G5R5X1_UNORM:
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- case PIPE_FORMAT_B4G4R4X4_UNORM:
- case PIPE_FORMAT_B2G3R3_UNORM:
- return 1;
- default:
- return 0;
- }
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ case PIPE_FORMAT_B2G3R3_UNORM:
+ return 1;
+ default:
+ return 0;
+ }
}
static bool
use_hw_binning(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
-
- /* we hardcoded a limit of 8 "pipes", we can increase this limit
- * at the cost of a slightly larger command stream
- * however very few cases will need more than 8
- * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
- */
- if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
- return false;
-
- /* only a20x hw binning is implement
- * a22x is more like a3xx, but perhaps the a20x works? (TODO)
- */
- if (!is_a20x(batch->ctx->screen))
- return false;
-
- return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+
+ /* we hardcoded a limit of 8 "pipes", we can increase this limit
+ * at the cost of a slightly larger command stream
+ * however very few cases will need more than 8
+ * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
+ */
+ if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
+ return false;
+
+ /* only a20x hw binning is implement
+ * a22x is more like a3xx, but perhaps the a20x works? (TODO)
+ */
+ if (!is_a20x(batch->ctx->screen))
+ return false;
+
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
}
/* transfer from gmem to system memory (ie. normal RAM) */
static void
emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
- struct pipe_surface *psurf)
+ struct pipe_surface *psurf)
{
- struct fd_ringbuffer *ring = batch->tile_fini;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- uint32_t offset =
- fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
- enum pipe_format format = fd_gmem_restore_format(psurf->format);
- uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
-
- assert((pitch & 31) == 0);
- assert((offset & 0xfff) == 0);
-
- if (!rsc->valid)
- return;
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
- OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
- OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
- OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
- OUT_RING(ring, pitch >> 5); /* RB_COPY_DEST_PITCH */
- OUT_RING(ring, /* RB_COPY_DEST_INFO */
- A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
- COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
- A2XX_RB_COPY_DEST_INFO_WRITE_RED |
- A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
- A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
- A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
-
- if (!is_a20x(batch->ctx->screen)) {
- OUT_WFI (ring);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- }
-
- fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ struct fd_ringbuffer *ring = batch->tile_fini;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ uint32_t offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ enum pipe_format format = fd_gmem_restore_format(psurf->format);
+ uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
+
+ assert((pitch & 31) == 0);
+ assert((offset & 0xfff) == 0);
+
+ if (!rsc->valid)
+ return;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+ OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, pitch >> 5); /* RB_COPY_DEST_PITCH */
+ OUT_RING(ring, /* RB_COPY_DEST_INFO */
+ A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
+ COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
+ A2XX_RB_COPY_DEST_INFO_WRITE_RED |
+ A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
+ A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
+ A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+ if (!is_a20x(batch->ctx->screen)) {
+ OUT_WFI(ring);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+ }
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
-prepare_tile_fini_ib(struct fd_batch *batch)
- assert_dt
+prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd2_context *fd2_ctx = fd2_context(ctx);
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring;
-
- batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
- FD_RINGBUFFER_STREAMING);
- ring = batch->tile_fini;
-
- fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
- { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
- }, 1);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
- OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
- OUT_RING(ring, 0);
-
- if (!is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000028f);
- }
-
- fd2_program_emit(ctx, ring, &ctx->solid_prog);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
- OUT_RING(ring, 0x0000ffff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
- OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
- OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
- A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
- OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
- OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XSCALE */
- OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XOFFSET */
- OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YSCALE */
- OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YOFFSET */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
- OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
-
- if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
- emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
-
- if (batch->resolve & FD_BUFFER_COLOR)
- emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
- OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
-
- if (!is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000003b);
- }
+ struct fd_context *ctx = batch->ctx;
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring;
+
+ batch->tile_fini =
+ fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+ ring = batch->tile_fini;
+
+ fd2_emit_vertex_bufs(ring, 0x9c,
+ (struct fd2_vertex_buf[]){
+ {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
+ },
+ 1);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+ OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+ OUT_RING(ring, 0);
+
+ if (!is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000028f);
+ }
+
+ fd2_program_emit(ctx, ring, &ctx->solid_prog);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+ OUT_RING(ring, 0x0000ffff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+ OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+ OUT_RING(
+ ring,
+ A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
+ A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
+ OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui((float)gmem->bin_w / 2.0)); /* XSCALE */
+ OUT_RING(ring, fui((float)gmem->bin_w / 2.0)); /* XOFFSET */
+ OUT_RING(ring, fui((float)gmem->bin_h / 2.0)); /* YSCALE */
+ OUT_RING(ring, fui((float)gmem->bin_h / 2.0)); /* YOFFSET */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+ OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
+
+ if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
+
+ if (batch->resolve & FD_BUFFER_COLOR)
+ emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+ OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+
+ if (!is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000003b);
+ }
}
static void
fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
{
- fd2_emit_ib(batch->gmem, batch->tile_fini);
+ fd2_emit_ib(batch->gmem, batch->tile_fini);
}
/* transfer from system memory to gmem */
static void
emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
- struct pipe_surface *psurf)
+ struct pipe_surface *psurf)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- uint32_t offset =
- fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
- enum pipe_format format = fd_gmem_restore_format(psurf->format);
-
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
- OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
-
- /* emit fb as a texture: */
- OUT_PKT3(ring, CP_SET_CONSTANT, 7);
- OUT_RING(ring, 0x00010000);
- OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
- A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
- A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
- A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
- OUT_RELOC(ring, rsc->bo, offset,
- A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
- A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
- OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
- A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
- OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
- A2XX_SQ_TEX_3_SWIZ_X(0) |
- A2XX_SQ_TEX_3_SWIZ_Y(1) |
- A2XX_SQ_TEX_3_SWIZ_Z(2) |
- A2XX_SQ_TEX_3_SWIZ_W(3) |
- A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
- A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
-
- if (!is_a20x(batch->ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- }
-
- fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ uint32_t offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ enum pipe_format format = fd_gmem_restore_format(psurf->format);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+ OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+
+ /* emit fb as a texture: */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+ OUT_RING(ring, 0x00010000);
+ OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
+ A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
+ A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
+ A2XX_SQ_TEX_0_PITCH(
+ fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
+ OUT_RELOC(ring, rsc->bo, offset,
+ A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
+ A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL),
+ 0);
+ OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
+ A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
+ OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
+ A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) |
+ A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) |
+ A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
+ A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
+
+ if (!is_a20x(batch->ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+ }
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
-fd2_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd2_emit_tile_mem2gmem(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd2_context *fd2_ctx = fd2_context(ctx);
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- unsigned bin_w = tile->bin_w;
- unsigned bin_h = tile->bin_h;
- float x0, y0, x1, y1;
-
- fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
- { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
- { .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
- }, 2);
-
- /* write texture coordinates to vertexbuf: */
- x0 = ((float)tile->xoff) / ((float)pfb->width);
- x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
- y0 = ((float)tile->yoff) / ((float)pfb->height);
- y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
- OUT_PKT3(ring, CP_MEM_WRITE, 7);
- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
- OUT_RING(ring, fui(x0));
- OUT_RING(ring, fui(y0));
- OUT_RING(ring, fui(x1));
- OUT_RING(ring, fui(y0));
- OUT_RING(ring, fui(x0));
- OUT_RING(ring, fui(y1));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
- OUT_RING(ring, 0);
-
- fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
-
- OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
- OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
- OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
- OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
- A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
- OUT_RING(ring, 0x0000ffff);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
- OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
- A2XX_RB_COLORCONTROL_BLEND_DISABLE |
- A2XX_RB_COLORCONTROL_ROP_CODE(12) |
- A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
- A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
- A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
- A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
- A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
- A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
- A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
- xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */
- OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
- OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XSCALE */
- OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XOFFSET */
- OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
- OUT_RING(ring, fui((float)bin_h/2.0)); /* PA_CL_VPORT_YOFFSET */
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
- OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
- A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
- A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
- OUT_RING(ring, 0x00000000);
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
- emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
- emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
- OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
- A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
- A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
-
- /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
+ struct fd_context *ctx = batch->ctx;
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ unsigned bin_w = tile->bin_w;
+ unsigned bin_h = tile->bin_h;
+ float x0, y0, x1, y1;
+
+ fd2_emit_vertex_bufs(
+ ring, 0x9c,
+ (struct fd2_vertex_buf[]){
+ {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
+ {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36},
+ },
+ 2);
+
+ /* write texture coordinates to vertexbuf: */
+ x0 = ((float)tile->xoff) / ((float)pfb->width);
+ x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+ y0 = ((float)tile->yoff) / ((float)pfb->height);
+ y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+ OUT_PKT3(ring, CP_MEM_WRITE, 7);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
+ OUT_RING(ring, fui(x0));
+ OUT_RING(ring, fui(y0));
+ OUT_RING(ring, fui(x1));
+ OUT_RING(ring, fui(y0));
+ OUT_RING(ring, fui(x0));
+ OUT_RING(ring, fui(y1));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+ OUT_RING(ring, 0);
+
+ fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
+
+ OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+ OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+ OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+ OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
+ A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+ OUT_RING(ring, 0x0000ffff);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+ OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
+ A2XX_RB_COLORCONTROL_BLEND_DISABLE |
+ A2XX_RB_COLORCONTROL_ROP_CODE(12) |
+ A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
+ A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
+ A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
+ A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
+ A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
+ A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
+ xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
+ OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui((float)bin_w / 2.0)); /* PA_CL_VPORT_XSCALE */
+ OUT_RING(ring, fui((float)bin_w / 2.0)); /* PA_CL_VPORT_XOFFSET */
+ OUT_RING(ring, fui(-(float)bin_h / 2.0)); /* PA_CL_VPORT_YSCALE */
+ OUT_RING(ring, fui((float)bin_h / 2.0)); /* PA_CL_VPORT_YOFFSET */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+ OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
+ A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+ OUT_RING(ring, 0x00000000);
+
+ if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+ emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
+
+ if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
+ emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+ OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+ A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+
+ /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
}
static void
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
{
- unsigned i;
-
- if (!is_a20x(batch->ctx->screen)) {
- /* identical to a3xx */
- for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
- *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
- }
- util_dynarray_clear(&batch->draw_patches);
- return;
- }
-
- if (vismode == USE_VISIBILITY)
- return;
-
- for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
- uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
- unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
-
- /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
- * replace first two DWORDS with NOP and move the rest down
- * (we don't want to have to move the idx buffer reloc)
- */
- ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
- ptr[1] = 0x00000000;
-
- ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
- ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
- ptr[3] = 0x00000000;
- }
+ unsigned i;
+
+ if (!is_a20x(batch->ctx->screen)) {
+ /* identical to a3xx */
+ for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+ *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
+ }
+ util_dynarray_clear(&batch->draw_patches);
+ return;
+ }
+
+ if (vismode == USE_VISIBILITY)
+ return;
+
+ for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) {
+ uint32_t *ptr =
+ *util_dynarray_element(&batch->draw_patches, uint32_t *, i);
+ unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
+
+ /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
+ * replace first two DWORDS with NOP and move the rest down
+ * (we don't want to have to move the idx buffer reloc)
+ */
+ ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
+ ptr[1] = 0x00000000;
+
+ ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
+ ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8);
+ ptr[3] = 0x00000000;
+ }
}
static void
fd2_emit_sysmem_prep(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct pipe_surface *psurf = pfb->cbufs[0];
-
- if (!psurf)
- return;
-
- struct fd_resource *rsc = fd_resource(psurf->texture);
- uint32_t offset =
- fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
- uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
-
- assert((pitch & 31) == 0);
- assert((offset & 0xfff) == 0);
-
- fd2_emit_restore(ctx, ring);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
- OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
- OUT_RELOC(ring, rsc->bo, offset,
- COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
- A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
- OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
- OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
- A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
- OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
- A2XX_PA_SC_WINDOW_OFFSET_Y(0));
-
- patch_draws(batch, IGNORE_VISIBILITY);
- util_dynarray_clear(&batch->draw_patches);
- util_dynarray_clear(&batch->shader_patches);
+ struct fd_context *ctx = batch->ctx;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct pipe_surface *psurf = pfb->cbufs[0];
+
+ if (!psurf)
+ return;
+
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ uint32_t offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
+
+ assert((pitch & 31) == 0);
+ assert((offset & 0xfff) == 0);
+
+ fd2_emit_restore(ctx, ring);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+ OUT_RELOC(ring, rsc->bo, offset,
+ COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
+ A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)),
+ 0);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+ OUT_RING(ring,
+ A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+ patch_draws(batch, IGNORE_VISIBILITY);
+ util_dynarray_clear(&batch->draw_patches);
+ util_dynarray_clear(&batch->shader_patches);
}
/* before first tile */
static void
-fd2_emit_tile_init(struct fd_batch *batch)
- assert_dt
+fd2_emit_tile_init(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
- uint32_t reg;
-
- fd2_emit_restore(ctx, ring);
-
- prepare_tile_fini_ib(batch);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 4);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
- OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
- OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
- reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
- if (pfb->zsbuf)
- reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
- OUT_RING(ring, reg); /* RB_DEPTH_INFO */
-
- /* fast clear patches */
- int depth_size = -1;
- int color_size = -1;
-
- if (pfb->cbufs[0])
- color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
-
- if (pfb->zsbuf)
- depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
-
- for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
- uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
- uint32_t size, lines;
-
- /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
- switch (patch->val) {
- case GMEM_PATCH_FASTCLEAR_COLOR:
- size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
- lines = size / 1024;
- depth_base = size / 2;
- break;
- case GMEM_PATCH_FASTCLEAR_DEPTH:
- size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
- lines = size / 1024;
- color_base = depth_base;
- depth_base = depth_base + size / 2;
- break;
- case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
- lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
- break;
- case GMEM_PATCH_RESTORE_INFO:
- patch->cs[0] = gmem->bin_w;
- patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
- patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
- if (pfb->zsbuf)
- patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
- continue;
- default:
- continue;
- }
-
- patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
- A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
- patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
- A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
- patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
- A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
- }
- util_dynarray_clear(&batch->gmem_patches);
-
- /* set to zero, for some reason hardware doesn't like certain values */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
- OUT_RING(ring, 0);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
- OUT_RING(ring, 0);
-
- if (use_hw_binning(batch)) {
- /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
- *
- * in the shader compiler, we guarantee that the shader ends with
- * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
- *
- * the since patches point only to dwords and CFs are 1.5 dwords
- * the patch is aligned and might point to a ALLOC CF
- */
- for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
- instr_cf_t *cf =
- *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
- if (cf->opc == ALLOC)
- cf++;
- assert(cf->opc == EXEC);
- assert(cf[ctx->screen->info.num_vsc_pipes*2-2].opc == EXEC_END);
- cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
- }
-
- patch_draws(batch, USE_VISIBILITY);
-
- /* initialize shader constants for the binning memexport */
- OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
- OUT_RING(ring, 0x0000000C);
-
- for (int i = 0; i < gmem->num_vsc_pipes; i++) {
- /* allocate in 64k increments to avoid reallocs */
- uint32_t bo_size = align(batch->num_vertices, 0x10000);
- if (!ctx->vsc_pipe_bo[i] || fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
- if (ctx->vsc_pipe_bo[i])
- fd_bo_del(ctx->vsc_pipe_bo[i]);
- ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, bo_size,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
- assert(ctx->vsc_pipe_bo[i]);
- }
-
- /* memory export address (export32):
- * .x: (base_address >> 2) | 0x40000000 (?)
- * .y: index (float) - set by shader
- * .z: 0x4B00D000 (?)
- * .w: 0x4B000000 (?) | max_index (?)
- */
- OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x4B00D000);
- OUT_RING(ring, 0x4B000000 | bo_size);
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
- OUT_RING(ring, 0x0000018C);
-
- for (int i = 0; i < gmem->num_vsc_pipes; i++) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- float off_x, off_y, mul_x, mul_y;
-
- /* const to tranform from [-1,1] to bin coordinates for this pipe
- * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
- * 8 possible values on x/y axis,
- * to clip at binning stage: only use center 6x6
- * TODO: set the z parameters too so that hw binning
- * can clip primitives in Z too
- */
-
- mul_x = 1.0f / (float) (gmem->bin_w * 8);
- mul_y = 1.0f / (float) (gmem->bin_h * 8);
- off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
- off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
-
- OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
- OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
- OUT_RING(ring, 0x3f000000);
- OUT_RING(ring, fui(0.0f));
-
- OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
- OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(0.0f));
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0);
-
- fd2_emit_ib(ring, batch->binning);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x00000002);
- } else {
- patch_draws(batch, IGNORE_VISIBILITY);
- }
-
- util_dynarray_clear(&batch->draw_patches);
- util_dynarray_clear(&batch->shader_patches);
+ struct fd_context *ctx = batch->ctx;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ uint32_t reg;
+
+ fd2_emit_restore(ctx, ring);
+
+ prepare_tile_fini_ib(batch);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
+ OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+ reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+ if (pfb->zsbuf)
+ reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ OUT_RING(ring, reg); /* RB_DEPTH_INFO */
+
+ /* fast clear patches */
+ int depth_size = -1;
+ int color_size = -1;
+
+ if (pfb->cbufs[0])
+ color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
+
+ if (pfb->zsbuf)
+ depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
+
+ for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
+ uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
+ uint32_t size, lines;
+
+ /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
+ switch (patch->val) {
+ case GMEM_PATCH_FASTCLEAR_COLOR:
+ size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
+ lines = size / 1024;
+ depth_base = size / 2;
+ break;
+ case GMEM_PATCH_FASTCLEAR_DEPTH:
+ size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
+ lines = size / 1024;
+ color_base = depth_base;
+ depth_base = depth_base + size / 2;
+ break;
+ case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
+ lines =
+ align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
+ break;
+ case GMEM_PATCH_RESTORE_INFO:
+ patch->cs[0] = gmem->bin_w;
+ patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
+ patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+ if (pfb->zsbuf)
+ patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(
+ fd_pipe2depth(pfb->zsbuf->format));
+ continue;
+ default:
+ continue;
+ }
+
+ patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
+ patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
+ A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
+ patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
+ A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
+ }
+ util_dynarray_clear(&batch->gmem_patches);
+
+ /* set to zero, for some reason hardware doesn't like certain values */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+ OUT_RING(ring, 0);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+ OUT_RING(ring, 0);
+
+ if (use_hw_binning(batch)) {
+ /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
+ *
+ * in the shader compiler, we guarantee that the shader ends with
+ * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
+ *
+ * the since patches point only to dwords and CFs are 1.5 dwords
+ * the patch is aligned and might point to a ALLOC CF
+ */
+ for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) {
+ instr_cf_t *cf =
+ *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i);
+ if (cf->opc == ALLOC)
+ cf++;
+ assert(cf->opc == EXEC);
+ assert(cf[ctx->screen->info.num_vsc_pipes * 2 - 2].opc == EXEC_END);
+ cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END;
+ }
+
+ patch_draws(batch, USE_VISIBILITY);
+
+ /* initialize shader constants for the binning memexport */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
+ OUT_RING(ring, 0x0000000C);
+
+ for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+ /* allocate in 64k increments to avoid reallocs */
+ uint32_t bo_size = align(batch->num_vertices, 0x10000);
+ if (!ctx->vsc_pipe_bo[i] ||
+ fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
+ if (ctx->vsc_pipe_bo[i])
+ fd_bo_del(ctx->vsc_pipe_bo[i]);
+ ctx->vsc_pipe_bo[i] =
+ fd_bo_new(ctx->dev, bo_size, DRM_FREEDRENO_GEM_TYPE_KMEM,
+ "vsc_pipe[%u]", i);
+ assert(ctx->vsc_pipe_bo[i]);
+ }
+
+ /* memory export address (export32):
+ * .x: (base_address >> 2) | 0x40000000 (?)
+ * .y: index (float) - set by shader
+ * .z: 0x4B00D000 (?)
+ * .w: 0x4B000000 (?) | max_index (?)
+ */
+ OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x4B00D000);
+ OUT_RING(ring, 0x4B000000 | bo_size);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
+ OUT_RING(ring, 0x0000018C);
+
+ for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ float off_x, off_y, mul_x, mul_y;
+
+ /* const to tranform from [-1,1] to bin coordinates for this pipe
+ * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
+ * 8 possible values on x/y axis,
+ * to clip at binning stage: only use center 6x6
+ * TODO: set the z parameters too so that hw binning
+ * can clip primitives in Z too
+ */
+
+ mul_x = 1.0f / (float)(gmem->bin_w * 8);
+ mul_y = 1.0f / (float)(gmem->bin_h * 8);
+ off_x = -pipe->x * (1.0 / 8.0f) + 0.125f - mul_x * gmem->minx;
+ off_y = -pipe->y * (1.0 / 8.0f) + 0.125f - mul_y * gmem->miny;
+
+ OUT_RING(ring, fui(off_x * (256.0f / 255.0f)));
+ OUT_RING(ring, fui(off_y * (256.0f / 255.0f)));
+ OUT_RING(ring, 0x3f000000);
+ OUT_RING(ring, fui(0.0f));
+
+ OUT_RING(ring, fui(mul_x * (256.0f / 255.0f)));
+ OUT_RING(ring, fui(mul_y * (256.0f / 255.0f)));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0);
+
+ fd2_emit_ib(ring, batch->binning);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x00000002);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
+
+ util_dynarray_clear(&batch->draw_patches);
+ util_dynarray_clear(&batch->shader_patches);
}
/* before mem2gmem */
static void
fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
- OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
-
- /* setup screen scissor for current tile (same for mem2gmem): */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
- OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
- A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
- A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+ OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+
+ /* setup screen scissor for current tile (same for mem2gmem): */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
+ A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
}
/* before IB to rendering cmds: */
static void
-fd2_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd2_emit_tile_renderprep(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd2_context *fd2_ctx = fd2_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
- OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
- A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
-
- /* setup window scissor and offset for current tile (different
- * from mem2gmem):
- */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
- OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
- A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
-
- /* write SCISSOR_BR to memory so fast clear path can restore from it */
- OUT_PKT3(ring, CP_MEM_WRITE, 2);
- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
- OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
- A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
-
- /* set the copy offset for gmem2mem */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
- OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
- A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
-
- /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
- if (is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, 0x00000580);
- OUT_RING(ring, fui(tile->xoff));
- OUT_RING(ring, fui(tile->yoff));
- OUT_RING(ring, fui(0.0f));
- OUT_RING(ring, fui(0.0f));
- }
-
- if (use_hw_binning(batch)) {
- struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
- OUT_RING(ring, tile->n);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
- OUT_RING(ring, tile->n);
-
- /* TODO only emit this when tile->p changes */
- OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
- OUT_RELOC(ring, pipe_bo, 0, 0, 0);
- }
+ struct fd_context *ctx = batch->ctx;
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+ OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+
+ /* setup window scissor and offset for current tile (different
+ * from mem2gmem):
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+ OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
+ A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
+
+ /* write SCISSOR_BR to memory so fast clear path can restore from it */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
+
+ /* set the copy offset for gmem2mem */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
+ OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
+ A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
+
+ /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
+ if (is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000580);
+ OUT_RING(ring, fui(tile->xoff));
+ OUT_RING(ring, fui(tile->yoff));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ if (use_hw_binning(batch)) {
+ struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+ OUT_RING(ring, tile->n);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+ OUT_RING(ring, tile->n);
+
+ /* TODO only emit this when tile->p changes */
+ OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+ OUT_RELOC(ring, pipe_bo, 0, 0, 0);
+ }
}
void
-fd2_gmem_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
- ctx->emit_tile_init = fd2_emit_tile_init;
- ctx->emit_tile_prep = fd2_emit_tile_prep;
- ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
- ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
- ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
+ ctx->emit_tile_init = fd2_emit_tile_init;
+ ctx->emit_tile_prep = fd2_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
}
* Jonathan Marek <jonathan@marek.ca>
*/
+#include "nir/tgsi_to_nir.h"
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
-#include "nir/tgsi_to_nir.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_program.h"
-#include "ir2.h"
+#include "ir2/instr-a2xx.h"
#include "fd2_program.h"
#include "fd2_texture.h"
#include "fd2_util.h"
-#include "ir2/instr-a2xx.h"
+#include "ir2.h"
static struct fd2_shader_stateobj *
create_shader(struct pipe_context *pctx, gl_shader_stage type)
{
- struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
- if (!so)
- return NULL;
- so->type = type;
- so->is_a20x = is_a20x(fd_context(pctx)->screen);
- return so;
+ struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
+ if (!so)
+ return NULL;
+ so->type = type;
+ so->is_a20x = is_a20x(fd_context(pctx)->screen);
+ return so;
}
static void
delete_shader(struct fd2_shader_stateobj *so)
{
- if (!so)
- return;
- ralloc_free(so->nir);
- for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
- free(so->variant[i].info.dwords);
- free(so);
+ if (!so)
+ return;
+ ralloc_free(so->nir);
+ for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
+ free(so->variant[i].info.dwords);
+ free(so);
}
static void
emit(struct fd_ringbuffer *ring, gl_shader_stage type,
- struct ir2_shader_info *info, struct util_dynarray *patches)
+ struct ir2_shader_info *info, struct util_dynarray *patches)
{
- unsigned i;
+ unsigned i;
- assert(info->sizedwords);
+ assert(info->sizedwords);
- OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
- OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
- OUT_RING(ring, info->sizedwords);
+ OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
+ OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
+ OUT_RING(ring, info->sizedwords);
- if (patches)
- util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
+ if (patches)
+ util_dynarray_append(patches, uint32_t *,
+ &ring->cur[info->mem_export_ptr]);
- for (i = 0; i < info->sizedwords; i++)
- OUT_RING(ring, info->dwords[i]);
+ for (i = 0; i < info->sizedwords; i++)
+ OUT_RING(ring, info->dwords[i]);
}
static int
ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
{
- return glsl_count_attribute_slots(type, false);
+ return glsl_count_attribute_slots(type, false);
}
static void *
fd2_fp_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+ const struct pipe_shader_state *cso)
{
- struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
- if (!so)
- return NULL;
+ struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
+ if (!so)
+ return NULL;
- so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
- tgsi_to_nir(cso->tokens, pctx->screen, false);
+ so->nir = (cso->type == PIPE_SHADER_IR_NIR)
+ ? cso->ir.nir
+ : tgsi_to_nir(cso->tokens, pctx->screen, false);
- NIR_PASS_V(so->nir, nir_lower_io,
- nir_var_shader_in | nir_var_shader_out,
- ir2_glsl_type_size, (nir_lower_io_options)0);
+ NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ ir2_glsl_type_size, (nir_lower_io_options)0);
- if (ir2_optimize_nir(so->nir, true))
- goto fail;
+ if (ir2_optimize_nir(so->nir, true))
+ goto fail;
- so->first_immediate = so->nir->num_uniforms;
+ so->first_immediate = so->nir->num_uniforms;
- ir2_compile(so, 0, NULL);
+ ir2_compile(so, 0, NULL);
- ralloc_free(so->nir);
- so->nir = NULL;
- return so;
+ ralloc_free(so->nir);
+ so->nir = NULL;
+ return so;
fail:
- delete_shader(so);
- return NULL;
+ delete_shader(so);
+ return NULL;
}
static void
fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd2_shader_stateobj *so = hwcso;
- delete_shader(so);
+ struct fd2_shader_stateobj *so = hwcso;
+ delete_shader(so);
}
static void *
fd2_vp_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+ const struct pipe_shader_state *cso)
{
- struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
- if (!so)
- return NULL;
+ struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
+ if (!so)
+ return NULL;
- so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
- tgsi_to_nir(cso->tokens, pctx->screen, false);
+ so->nir = (cso->type == PIPE_SHADER_IR_NIR)
+ ? cso->ir.nir
+ : tgsi_to_nir(cso->tokens, pctx->screen, false);
- NIR_PASS_V(so->nir, nir_lower_io,
- nir_var_shader_in | nir_var_shader_out,
- ir2_glsl_type_size, (nir_lower_io_options)0);
+ NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ ir2_glsl_type_size, (nir_lower_io_options)0);
- if (ir2_optimize_nir(so->nir, true))
- goto fail;
+ if (ir2_optimize_nir(so->nir, true))
+ goto fail;
- so->first_immediate = so->nir->num_uniforms;
+ so->first_immediate = so->nir->num_uniforms;
- /* compile binning variant now */
- ir2_compile(so, 0, NULL);
+ /* compile binning variant now */
+ ir2_compile(so, 0, NULL);
- return so;
+ return so;
fail:
- delete_shader(so);
- return NULL;
+ delete_shader(so);
+ return NULL;
}
static void
fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd2_shader_stateobj *so = hwcso;
- delete_shader(so);
+ struct fd2_shader_stateobj *so = hwcso;
+ delete_shader(so);
}
static void
patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
- instr_fetch_vtx_t *instr, uint16_t dst_swiz)
- assert_dt
+ instr_fetch_vtx_t *instr, uint16_t dst_swiz) assert_dt
{
- struct surface_format fmt = fd2_pipe2surface(elem->src_format);
-
- instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
- instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
- instr->num_format_all = fmt.num_format;
- instr->format = fmt.format;
- instr->exp_adjust_all = fmt.exp_adjust;
- instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
- instr->offset = elem->src_offset;
+ struct surface_format fmt = fd2_pipe2surface(elem->src_format);
+
+ instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
+ instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
+ instr->num_format_all = fmt.num_format;
+ instr->format = fmt.format;
+ instr->exp_adjust_all = fmt.exp_adjust;
+ instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
+ instr->offset = elem->src_offset;
}
static void
patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
- struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
- assert_dt
+ struct fd_vertex_stateobj *vtx,
+ struct fd_texture_stateobj *tex) assert_dt
{
- for (int i = 0; i < info->num_fetch_instrs; i++) {
- struct ir2_fetch_info *fi = &info->fetch_info[i];
-
- instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
- if (instr->opc == VTX_FETCH) {
- unsigned idx = (instr->vtx.const_index - 20) * 3 +
- instr->vtx.const_index_sel;
- patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
- continue;
- }
-
- assert(instr->opc == TEX_FETCH);
- instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
- instr->tex.src_swiz = fi->tex.src_swiz;
- }
+ for (int i = 0; i < info->num_fetch_instrs; i++) {
+ struct ir2_fetch_info *fi = &info->fetch_info[i];
+
+ instr_fetch_t *instr = (instr_fetch_t *)&info->dwords[fi->offset];
+ if (instr->opc == VTX_FETCH) {
+ unsigned idx =
+ (instr->vtx.const_index - 20) * 3 + instr->vtx.const_index_sel;
+ patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
+ continue;
+ }
+
+ assert(instr->opc == TEX_FETCH);
+ instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
+ instr->tex.src_swiz = fi->tex.src_swiz;
+ }
}
void
fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog)
+ struct fd_program_stateobj *prog)
{
- struct fd2_shader_stateobj *fp = NULL, *vp;
- struct ir2_shader_info *fpi, *vpi;
- struct ir2_frag_linkage *f;
- uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
- enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
- bool binning = (ctx->batch && ring == ctx->batch->binning);
- unsigned variant = 0;
-
- vp = prog->vs;
-
- /* find variant matching the linked fragment shader */
- if (!binning) {
- fp = prog->fs;
- for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
- /* if checked all variants, compile a new variant */
- if (!vp->variant[variant].info.sizedwords) {
- ir2_compile(vp, variant, fp);
- break;
- }
-
- /* check if fragment shader linkage matches */
- if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
- sizeof(struct ir2_frag_linkage)))
- break;
- }
- assert(variant < ARRAY_SIZE(vp->variant));
- }
-
- vpi = &vp->variant[variant].info;
- fpi = &fp->variant[0].info;
- f = &fp->variant[0].f;
-
- /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
- if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
- patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
- if (fp)
- patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
- }
-
- emit(ring, MESA_SHADER_VERTEX, vpi,
- binning ? &ctx->batch->shader_patches : NULL);
-
- if (fp) {
- emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
- fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
- vs_export = MAX2(1, f->inputs_count) - 1;
- }
-
- vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
-
- if (vp->writes_psize && !binning)
- mode = POSITION_2_VECTORS_SPRITE;
-
- /* set register to use for param (fragcoord/pointcoord/frontfacing) */
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
- OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
- COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
- /* we need SCREEN_XY for both fragcoord and frontfacing */
- A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
- OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
- A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
- A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
- A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
- A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
- A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
- A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
- COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
- COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
+ struct fd2_shader_stateobj *fp = NULL, *vp;
+ struct ir2_shader_info *fpi, *vpi;
+ struct ir2_frag_linkage *f;
+ uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
+ enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
+ bool binning = (ctx->batch && ring == ctx->batch->binning);
+ unsigned variant = 0;
+
+ vp = prog->vs;
+
+ /* find variant matching the linked fragment shader */
+ if (!binning) {
+ fp = prog->fs;
+ for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
+ /* if checked all variants, compile a new variant */
+ if (!vp->variant[variant].info.sizedwords) {
+ ir2_compile(vp, variant, fp);
+ break;
+ }
+
+ /* check if fragment shader linkage matches */
+ if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
+ sizeof(struct ir2_frag_linkage)))
+ break;
+ }
+ assert(variant < ARRAY_SIZE(vp->variant));
+ }
+
+ vpi = &vp->variant[variant].info;
+ fpi = &fp->variant[0].info;
+ f = &fp->variant[0].f;
+
+ /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
+ if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
+ patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
+ if (fp)
+ patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+ }
+
+ emit(ring, MESA_SHADER_VERTEX, vpi,
+ binning ? &ctx->batch->shader_patches : NULL);
+
+ if (fp) {
+ emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
+ fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
+ vs_export = MAX2(1, f->inputs_count) - 1;
+ }
+
+ vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
+
+ if (vp->writes_psize && !binning)
+ mode = POSITION_2_VECTORS_SPRITE;
+
+ /* set register to use for param (fragcoord/pointcoord/frontfacing) */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
+ OUT_RING(ring,
+ A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
+ COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
+ /* we need SCREEN_XY for both fragcoord and frontfacing */
+ A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
+ OUT_RING(ring,
+ A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
+ A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
+ A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
+ A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
+ A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+ A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+ A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
+ COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
+ COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
}
void
fd2_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_program_stateobj *prog;
- struct fd2_shader_stateobj *so;
- struct ir2_shader_info *info;
- instr_fetch_vtx_t *instr;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_program_stateobj *prog;
+ struct fd2_shader_stateobj *so;
+ struct ir2_shader_info *info;
+ instr_fetch_vtx_t *instr;
- pctx->create_fs_state = fd2_fp_state_create;
- pctx->delete_fs_state = fd2_fp_state_delete;
+ pctx->create_fs_state = fd2_fp_state_create;
+ pctx->delete_fs_state = fd2_fp_state_delete;
- pctx->create_vs_state = fd2_vp_state_create;
- pctx->delete_vs_state = fd2_vp_state_delete;
+ pctx->create_vs_state = fd2_vp_state_create;
+ pctx->delete_vs_state = fd2_vp_state_delete;
- fd_prog_init(pctx);
+ fd_prog_init(pctx);
- /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
+ /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
- prog = &ctx->solid_prog;
- so = prog->vs;
- ir2_compile(prog->vs, 1, prog->fs);
+ prog = &ctx->solid_prog;
+ so = prog->vs;
+ ir2_compile(prog->vs, 1, prog->fs);
#define IR2_FETCH_SWIZ_XY01 0xb08
#define IR2_FETCH_SWIZ_XYZ1 0xa88
- info = &so->variant[1].info;
-
- instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
- instr->const_index = 26;
- instr->const_index_sel = 0;
- instr->format = FMT_32_32_32_FLOAT;
- instr->format_comp_all = false;
- instr->stride = 12;
- instr->num_format_all = true;
- instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
-
- prog = &ctx->blit_prog[0];
- so = prog->vs;
- ir2_compile(prog->vs, 1, prog->fs);
-
- info = &so->variant[1].info;
-
- instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
- instr->const_index = 26;
- instr->const_index_sel = 1;
- instr->format = FMT_32_32_FLOAT;
- instr->format_comp_all = false;
- instr->stride = 8;
- instr->num_format_all = false;
- instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
-
- instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
- instr->const_index = 26;
- instr->const_index_sel = 0;
- instr->format = FMT_32_32_32_FLOAT;
- instr->format_comp_all = false;
- instr->stride = 12;
- instr->num_format_all = false;
- instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
+ info = &so->variant[1].info;
+
+ instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
+ instr->const_index = 26;
+ instr->const_index_sel = 0;
+ instr->format = FMT_32_32_32_FLOAT;
+ instr->format_comp_all = false;
+ instr->stride = 12;
+ instr->num_format_all = true;
+ instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
+
+ prog = &ctx->blit_prog[0];
+ so = prog->vs;
+ ir2_compile(prog->vs, 1, prog->fs);
+
+ info = &so->variant[1].info;
+
+ instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
+ instr->const_index = 26;
+ instr->const_index_sel = 1;
+ instr->format = FMT_32_32_FLOAT;
+ instr->format_comp_all = false;
+ instr->stride = 8;
+ instr->num_format_all = false;
+ instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
+
+ instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[1].offset];
+ instr->const_index = 26;
+ instr->const_index_sel = 0;
+ instr->format = FMT_32_32_32_FLOAT;
+ instr->format_comp_all = false;
+ instr->stride = 12;
+ instr->num_format_all = false;
+ instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
}
#include "freedreno_context.h"
-#include "ir2.h"
#include "disasm.h"
+#include "ir2.h"
struct fd2_shader_stateobj {
- nir_shader *nir;
- gl_shader_stage type;
- bool is_a20x;
+ nir_shader *nir;
+ gl_shader_stage type;
+ bool is_a20x;
- /* note: using same set of immediates for all variants
- * it doesn't matter, other than the slightly larger command stream
- */
- unsigned first_immediate; /* const reg # of first immediate */
- unsigned num_immediates;
- struct {
- uint32_t val[4];
- unsigned ncomp;
- } immediates[64];
+ /* note: using same set of immediates for all variants
+ * it doesn't matter, other than the slightly larger command stream
+ */
+ unsigned first_immediate; /* const reg # of first immediate */
+ unsigned num_immediates;
+ struct {
+ uint32_t val[4];
+ unsigned ncomp;
+ } immediates[64];
- bool writes_psize;
- bool need_param;
- bool has_kill;
+ bool writes_psize;
+ bool need_param;
+ bool has_kill;
- /* note:
- * fragment shader only has one variant
- * first vertex shader variant is always binning shader
- * we should use a dynamic array but in normal case there is
- * only 2 variants (and 3 sometimes with GALLIUM_HUD)
- */
- struct ir2_shader_variant variant[8];
+ /* note:
+ * fragment shader only has one variant
+ * first vertex shader variant is always binning shader
+ * we should use a dynamic array but in normal case there is
+ * only 2 variants (and 3 sometimes with GALLIUM_HUD)
+ */
+ struct ir2_shader_variant variant[8];
};
void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog) assert_dt;
+ struct fd_program_stateobj *prog) assert_dt;
void fd2_prog_init(struct pipe_context *pctx);
#include "fd2_query.h"
struct PACKED fd2_query_sample {
- uint32_t start;
- uint32_t stop;
+ uint32_t start;
+ uint32_t stop;
};
/* offset of a single field of an array of fd2_query_sample: */
-#define query_sample_idx(aq, idx, field) \
- fd_resource((aq)->prsc)->bo, \
- (idx * sizeof(struct fd2_query_sample)) + \
- offsetof(struct fd2_query_sample, field), \
- 0, 0
+#define query_sample_idx(aq, idx, field) \
+ fd_resource((aq)->prsc)->bo, \
+ (idx * sizeof(struct fd2_query_sample)) + \
+ offsetof(struct fd2_query_sample, field), \
+ 0, 0
/* offset of a single field of fd2_query_sample: */
-#define query_sample(aq, field) \
- query_sample_idx(aq, 0, field)
+#define query_sample(aq, field) query_sample_idx(aq, 0, field)
/*
* Performance Counter (batch) queries:
*/
struct fd_batch_query_entry {
- uint8_t gid; /* group-id */
- uint8_t cid; /* countable-id within the group */
+ uint8_t gid; /* group-id */
+ uint8_t cid; /* countable-id within the group */
};
struct fd_batch_query_data {
- struct fd_screen *screen;
- unsigned num_query_entries;
- struct fd_batch_query_entry query_entries[];
+ struct fd_screen *screen;
+ unsigned num_query_entries;
+ struct fd_batch_query_entry query_entries[];
};
static void
-perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- /* configure performance counters for the requested queries: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
+ /* configure performance counters for the requested queries: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
- debug_assert(counter_idx < g->num_counters);
+ debug_assert(counter_idx < g->num_counters);
- OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
- OUT_RING(ring, g->countables[entry->cid].selector);
- }
+ OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
+ OUT_RING(ring, g->countables[entry->cid].selector);
+ }
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- /* and snapshot the start values */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+ /* and snapshot the start values */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
- OUT_PKT3(ring, CP_REG_TO_MEM, 2);
- OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
- OUT_RELOC(ring, query_sample_idx(aq, i, start));
- }
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
+ OUT_RELOC(ring, query_sample_idx(aq, i, start));
+ }
}
static void
-perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- /* TODO do we need to bother to turn anything off? */
+ /* TODO do we need to bother to turn anything off? */
- /* snapshot the end values: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+ /* snapshot the end values: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
- OUT_PKT3(ring, CP_REG_TO_MEM, 2);
- OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
- OUT_RELOC(ring, query_sample_idx(aq, i, stop));
- }
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
+ OUT_RELOC(ring, query_sample_idx(aq, i, stop));
+ }
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd2_query_sample *sp = buf;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd2_query_sample *sp = buf;
- for (unsigned i = 0; i < data->num_query_entries; i++)
- result->batch[i].u64 = sp[i].stop - sp[i].start;
+ for (unsigned i = 0; i < data->num_query_entries; i++)
+ result->batch[i].u64 = sp[i].stop - sp[i].start;
}
static const struct fd_acc_sample_provider perfcntr = {
- .query_type = FD_QUERY_FIRST_PERFCNTR,
- .always = true,
- .resume = perfcntr_resume,
- .pause = perfcntr_pause,
- .result = perfcntr_accumulate_result,
+ .query_type = FD_QUERY_FIRST_PERFCNTR,
+ .always = true,
+ .resume = perfcntr_resume,
+ .pause = perfcntr_pause,
+ .result = perfcntr_accumulate_result,
};
static struct pipe_query *
-fd2_create_batch_query(struct pipe_context *pctx,
- unsigned num_queries, unsigned *query_types)
+fd2_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
+ unsigned *query_types)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_screen *screen = ctx->screen;
- struct fd_query *q;
- struct fd_acc_query *aq;
- struct fd_batch_query_data *data;
-
- data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
- num_queries * sizeof(data->query_entries[0]));
-
- data->screen = screen;
- data->num_query_entries = num_queries;
-
- /* validate the requested query_types and ensure we don't try
- * to request more query_types of a given group than we have
- * counters:
- */
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
-
- for (unsigned i = 0; i < num_queries; i++) {
- unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
-
- /* verify valid query_type, ie. is it actually a perfcntr? */
- if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
- (idx >= screen->num_perfcntr_queries)) {
- mesa_loge("invalid batch query query_type: %u", query_types[i]);
- goto error;
- }
-
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
-
- entry->gid = pq->group_id;
-
- /* the perfcntr_queries[] table flattens all the countables
- * for each group in series, ie:
- *
- * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
- *
- * So to find the countable index just step back through the
- * table to find the first entry with the same group-id.
- */
- while (pq > screen->perfcntr_queries) {
- pq--;
- if (pq->group_id == entry->gid)
- entry->cid++;
- }
-
- if (counters_per_group[entry->gid] >=
- screen->perfcntr_groups[entry->gid].num_counters) {
- mesa_loge("too many counters for group %u", entry->gid);
- goto error;
- }
-
- counters_per_group[entry->gid]++;
- }
-
- q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
- aq = fd_acc_query(q);
-
- /* sample buffer size is based on # of queries: */
- aq->size = num_queries * sizeof(struct fd2_query_sample);
- aq->query_data = data;
-
- return (struct pipe_query *)q;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_screen *screen = ctx->screen;
+ struct fd_query *q;
+ struct fd_acc_query *aq;
+ struct fd_batch_query_data *data;
+
+ data = CALLOC_VARIANT_LENGTH_STRUCT(
+ fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
+
+ data->screen = screen;
+ data->num_query_entries = num_queries;
+
+ /* validate the requested query_types and ensure we don't try
+ * to request more query_types of a given group than we have
+ * counters:
+ */
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ for (unsigned i = 0; i < num_queries; i++) {
+ unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+ /* verify valid query_type, ie. is it actually a perfcntr? */
+ if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+ (idx >= screen->num_perfcntr_queries)) {
+ mesa_loge("invalid batch query query_type: %u", query_types[i]);
+ goto error;
+ }
+
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+ entry->gid = pq->group_id;
+
+ /* the perfcntr_queries[] table flattens all the countables
+ * for each group in series, ie:
+ *
+ * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+ *
+ * So to find the countable index just step back through the
+ * table to find the first entry with the same group-id.
+ */
+ while (pq > screen->perfcntr_queries) {
+ pq--;
+ if (pq->group_id == entry->gid)
+ entry->cid++;
+ }
+
+ if (counters_per_group[entry->gid] >=
+ screen->perfcntr_groups[entry->gid].num_counters) {
+ mesa_loge("too many counters for group %u", entry->gid);
+ goto error;
+ }
+
+ counters_per_group[entry->gid]++;
+ }
+
+ q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
+ aq = fd_acc_query(q);
+
+ /* sample buffer size is based on # of queries: */
+ aq->size = num_queries * sizeof(struct fd2_query_sample);
+ aq->query_data = data;
+
+ return (struct pipe_query *)q;
error:
- free(data);
- return NULL;
+ free(data);
+ return NULL;
}
void
-fd2_query_context_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd2_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->create_query = fd_acc_create_query;
- ctx->query_update_batch = fd_acc_query_update_batch;
+ ctx->create_query = fd_acc_create_query;
+ ctx->query_update_batch = fd_acc_query_update_batch;
- pctx->create_batch_query = fd2_create_batch_query;
+ pctx->create_batch_query = fd2_create_batch_query;
}
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd2_rasterizer.h"
#include "fd2_context.h"
+#include "fd2_rasterizer.h"
#include "fd2_util.h"
-
void *
fd2_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso)
+ const struct pipe_rasterizer_state *cso)
{
- struct fd2_rasterizer_stateobj *so;
- float psize_min, psize_max;
-
- so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
- if (!so)
- return NULL;
-
- if (cso->point_size_per_vertex) {
- psize_min = util_get_min_point_size(cso);
- psize_max = 8192.0 - 0.0625;
- } else {
- /* Force the point size to be as if the vertex output was disabled. */
- psize_min = cso->point_size;
- psize_max = cso->point_size;
- }
-
- so->base = *cso;
-
- so->pa_sc_line_stipple = cso->line_stipple_enable ?
- A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
- A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0;
-
- so->pa_cl_clip_cntl = 0; // TODO
-
- so->pa_su_vtx_cntl =
- A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL : PIXCENTER_D3D) |
- A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
-
- so->pa_su_point_size =
- A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) |
- A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
-
- so->pa_su_point_minmax =
- A2XX_PA_SU_POINT_MINMAX_MIN(psize_min/2) |
- A2XX_PA_SU_POINT_MINMAX_MAX(psize_max/2);
-
- so->pa_su_line_cntl =
- A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
-
- so->pa_su_sc_mode_cntl =
- A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
- A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
- A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
-
- if (cso->cull_face & PIPE_FACE_FRONT)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
- if (cso->cull_face & PIPE_FACE_BACK)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
- if (!cso->flatshade_first)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
- if (!cso->front_ccw)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
- if (cso->line_stipple_enable)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
- if (cso->multisample)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
-
- if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
- cso->fill_back != PIPE_POLYGON_MODE_FILL)
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
- else
- so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
-
- if (cso->offset_tri)
- so->pa_su_sc_mode_cntl |=
- A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
- A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
- A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
-
- return so;
+ struct fd2_rasterizer_stateobj *so;
+ float psize_min, psize_max;
+
+ so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 8192.0 - 0.0625;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
+
+ so->base = *cso;
+
+ so->pa_sc_line_stipple =
+ cso->line_stipple_enable
+ ? A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
+ A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor)
+ : 0;
+
+ so->pa_cl_clip_cntl = 0; // TODO
+
+ so->pa_su_vtx_cntl =
+ A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL
+ : PIXCENTER_D3D) |
+ A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
+
+ so->pa_su_point_size = A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size / 2) |
+ A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size / 2);
+
+ so->pa_su_point_minmax = A2XX_PA_SU_POINT_MINMAX_MIN(psize_min / 2) |
+ A2XX_PA_SU_POINT_MINMAX_MAX(psize_max / 2);
+
+ so->pa_su_line_cntl = A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width / 2);
+
+ so->pa_su_sc_mode_cntl =
+ A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
+ A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+ A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
+ if (!cso->flatshade_first)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
+ if (!cso->front_ccw)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
+ if (cso->line_stipple_enable)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
+ if (cso->multisample)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
+
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
+ else
+ so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
+
+ if (cso->offset_tri)
+ so->pa_su_sc_mode_cntl |=
+ A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
+ A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
+ A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
+
+ return so;
}
#ifndef FD2_RASTERIZER_H_
#define FD2_RASTERIZER_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
struct fd2_rasterizer_stateobj {
- struct pipe_rasterizer_state base;
- uint32_t pa_sc_line_stipple;
- uint32_t pa_cl_clip_cntl;
- uint32_t pa_su_vtx_cntl;
- uint32_t pa_su_point_size;
- uint32_t pa_su_point_minmax;
- uint32_t pa_su_line_cntl;
- uint32_t pa_su_sc_mode_cntl;
+ struct pipe_rasterizer_state base;
+ uint32_t pa_sc_line_stipple;
+ uint32_t pa_cl_clip_cntl;
+ uint32_t pa_su_vtx_cntl;
+ uint32_t pa_su_point_size;
+ uint32_t pa_su_point_minmax;
+ uint32_t pa_su_line_cntl;
+ uint32_t pa_su_sc_mode_cntl;
};
static inline struct fd2_rasterizer_stateobj *
fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
- return (struct fd2_rasterizer_stateobj *)rast;
+ return (struct fd2_rasterizer_stateobj *)rast;
}
-void * fd2_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso);
+void *fd2_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
#endif /* FD2_RASTERIZER_H_ */
uint32_t
fd2_setup_slices(struct fd_resource *rsc)
{
- struct pipe_resource *prsc = &rsc->b.b;
- enum pipe_format format = prsc->format;
- uint32_t height0 = util_format_get_nblocksy(format, prsc->height0);
- uint32_t level, size = 0;
+ struct pipe_resource *prsc = &rsc->b.b;
+ enum pipe_format format = prsc->format;
+ uint32_t height0 = util_format_get_nblocksy(format, prsc->height0);
+ uint32_t level, size = 0;
- /* 32 pixel alignment */
- fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
+ /* 32 pixel alignment */
+ fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
- for (level = 0; level <= prsc->last_level; level++) {
- struct fdl_slice *slice = fd_resource_slice(rsc, level);
- uint32_t pitch = fdl2_pitch(&rsc->layout, level);
- uint32_t nblocksy = align(u_minify(height0, level), 32);
+ for (level = 0; level <= prsc->last_level; level++) {
+ struct fdl_slice *slice = fd_resource_slice(rsc, level);
+ uint32_t pitch = fdl2_pitch(&rsc->layout, level);
+ uint32_t nblocksy = align(u_minify(height0, level), 32);
- /* mipmaps have power of two sizes in memory */
- if (level)
- nblocksy = util_next_power_of_two(nblocksy);
+ /* mipmaps have power of two sizes in memory */
+ if (level)
+ nblocksy = util_next_power_of_two(nblocksy);
- slice->offset = size;
- slice->size0 = align(pitch * nblocksy, 4096);
+ slice->offset = size;
+ slice->size0 = align(pitch * nblocksy, 4096);
- size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
- }
+ size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
+ }
- return size;
+ return size;
}
unsigned
fd2_tile_mode(const struct pipe_resource *tmpl)
{
- /* disable tiling for cube maps, freedreno uses a 2D array for the staging texture,
- * (a2xx supports 2D arrays but it is not implemented)
- */
- if (tmpl->target == PIPE_TEXTURE_CUBE)
- return 0;
- /* we can enable tiling for any resource we can render to */
- return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0;
+ /* disable tiling for cube maps, freedreno uses a 2D array for the staging
+ * texture, (a2xx supports 2D arrays but it is not implemented)
+ */
+ if (tmpl->target == PIPE_TEXTURE_CUBE)
+ return 0;
+ /* we can enable tiling for any resource we can render to */
+ return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0;
}
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
-#include "fd2_screen.h"
#include "fd2_context.h"
#include "fd2_emit.h"
-#include "fd2_util.h"
#include "fd2_resource.h"
+#include "fd2_screen.h"
+#include "fd2_util.h"
static bool
fd2_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned usage)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned storage_sample_count, unsigned usage)
{
- unsigned retval = 0;
-
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- (sample_count > 1)) { /* TODO add MSAA */
- DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
- util_format_name(format), target, sample_count, usage);
- return false;
- }
-
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
-
- if ((usage & PIPE_BIND_RENDER_TARGET) &&
- fd2_pipe2color(format) != (enum a2xx_colorformatx)~0) {
- retval |= PIPE_BIND_RENDER_TARGET;
- }
-
- if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) &&
- !util_format_is_srgb(format) &&
- !util_format_is_pure_integer(format) &&
- fd2_pipe2surface(format).format != FMT_INVALID) {
- retval |= usage & PIPE_BIND_VERTEX_BUFFER;
- /* the only npot blocksize supported texture format is R32G32B32_FLOAT */
- if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) ||
- format == PIPE_FORMAT_R32G32B32_FLOAT)
- retval |= usage & PIPE_BIND_SAMPLER_VIEW;
- }
-
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
- (fd2_pipe2color(format) != (enum a2xx_colorformatx)~0)) {
- retval |= usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED);
- }
-
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
-
- if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
-
- if (retval != usage) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, "
- "usage=%x, retval=%x", util_format_name(format),
- target, sample_count, usage, retval);
- }
-
- return retval == usage;
+ unsigned retval = 0;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ (sample_count > 1)) { /* TODO add MSAA */
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return false;
+ }
+
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
+
+ if ((usage & PIPE_BIND_RENDER_TARGET) &&
+ fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0) {
+ retval |= PIPE_BIND_RENDER_TARGET;
+ }
+
+ if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) &&
+ !util_format_is_srgb(format) && !util_format_is_pure_integer(format) &&
+ fd2_pipe2surface(format).format != FMT_INVALID) {
+ retval |= usage & PIPE_BIND_VERTEX_BUFFER;
+ /* the only npot blocksize supported texture format is R32G32B32_FLOAT */
+ if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) ||
+ format == PIPE_FORMAT_R32G32B32_FLOAT)
+ retval |= usage & PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
+ (fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x",
+ util_format_name(format), target, sample_count, usage, retval);
+ }
+
+ return retval == usage;
}
void
fd2_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = 1;
- pscreen->context_create = fd2_context_create;
- pscreen->is_format_supported = fd2_screen_is_format_supported;
+ screen->max_rts = 1;
+ pscreen->context_create = fd2_context_create;
+ pscreen->is_format_supported = fd2_screen_is_format_supported;
- screen->setup_slices = fd2_setup_slices;
- if (FD_DBG(TTILE))
- screen->tile_mode = fd2_tile_mode;
+ screen->setup_slices = fd2_setup_slices;
+ if (FD_DBG(TTILE))
+ screen->tile_mode = fd2_tile_mode;
- fd2_emit_init_screen(pscreen);
+ fd2_emit_init_screen(pscreen);
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd2_texture.h"
#include "fd2_util.h"
static enum sq_tex_clamp
tex_clamp(unsigned wrap)
{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return SQ_TEX_WRAP;
- case PIPE_TEX_WRAP_CLAMP:
- return SQ_TEX_CLAMP_HALF_BORDER;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return SQ_TEX_CLAMP_LAST_TEXEL;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return SQ_TEX_CLAMP_BORDER;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return SQ_TEX_MIRROR;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- return SQ_TEX_MIRROR_ONCE_BORDER;
- default:
- DBG("invalid wrap: %u", wrap);
- return 0;
- }
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return SQ_TEX_WRAP;
+ case PIPE_TEX_WRAP_CLAMP:
+ return SQ_TEX_CLAMP_HALF_BORDER;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return SQ_TEX_CLAMP_LAST_TEXEL;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return SQ_TEX_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return SQ_TEX_MIRROR;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return SQ_TEX_MIRROR_ONCE_BORDER;
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
}
static enum sq_tex_filter
tex_filter(unsigned filter)
{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST:
- return SQ_TEX_FILTER_POINT;
- case PIPE_TEX_FILTER_LINEAR:
- return SQ_TEX_FILTER_BILINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return SQ_TEX_FILTER_POINT;
+ case PIPE_TEX_FILTER_LINEAR:
+ return SQ_TEX_FILTER_BILINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static enum sq_tex_filter
mip_filter(unsigned filter)
{
- switch (filter) {
- case PIPE_TEX_MIPFILTER_NONE:
- return SQ_TEX_FILTER_BASEMAP;
- case PIPE_TEX_MIPFILTER_NEAREST:
- return SQ_TEX_FILTER_POINT;
- case PIPE_TEX_MIPFILTER_LINEAR:
- return SQ_TEX_FILTER_BILINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ return SQ_TEX_FILTER_BASEMAP;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return SQ_TEX_FILTER_POINT;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return SQ_TEX_FILTER_BILINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static void *
fd2_sampler_state_create(struct pipe_context *pctx,
- const struct pipe_sampler_state *cso)
+ const struct pipe_sampler_state *cso)
{
- struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
+ struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
- if (!so)
- return NULL;
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- /* TODO
- * cso->max_anisotropy
- * cso->normalized_coords (dealt with by shader for rect textures?)
- */
+ /* TODO
+ * cso->max_anisotropy
+ * cso->normalized_coords (dealt with by shader for rect textures?)
+ */
- /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
- so->tex0 =
- A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
- A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
- A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
+ /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
+ so->tex0 = A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
+ A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
+ A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
- so->tex3 =
- A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
- A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
- A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
+ so->tex3 = A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
+ A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
+ A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
- so->tex4 = 0;
- if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
- so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
+ so->tex4 = 0;
+ if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
+ so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
- return so;
+ return so;
}
static void
-fd2_sampler_states_bind(struct pipe_context *pctx,
- enum pipe_shader_type shader, unsigned start,
- unsigned nr, void **hwcso)
- in_dt
+fd2_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader,
+ unsigned start, unsigned nr, void **hwcso) in_dt
{
- if (!hwcso)
- nr = 0;
+ if (!hwcso)
+ nr = 0;
- if (shader == PIPE_SHADER_FRAGMENT) {
- struct fd_context *ctx = fd_context(pctx);
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ struct fd_context *ctx = fd_context(pctx);
- /* on a2xx, since there is a flat address space for textures/samplers,
- * a change in # of fragment textures/samplers will trigger patching and
- * re-emitting the vertex shader:
- */
- if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers)
- ctx->dirty |= FD_DIRTY_TEXSTATE;
- }
+ /* on a2xx, since there is a flat address space for textures/samplers,
+ * a change in # of fragment textures/samplers will trigger patching and
+ * re-emitting the vertex shader:
+ */
+ if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers)
+ ctx->dirty |= FD_DIRTY_TEXSTATE;
+ }
- fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+ fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
}
static enum sq_tex_dimension
tex_dimension(unsigned target)
{
- switch (target) {
- default:
- assert(0);
- case PIPE_TEXTURE_1D:
- assert(0); /* TODO */
- return SQ_TEX_DIMENSION_1D;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- return SQ_TEX_DIMENSION_2D;
- case PIPE_TEXTURE_3D:
- assert(0); /* TODO */
- return SQ_TEX_DIMENSION_3D;
- case PIPE_TEXTURE_CUBE:
- return SQ_TEX_DIMENSION_CUBE;
- }
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_TEXTURE_1D:
+ assert(0); /* TODO */
+ return SQ_TEX_DIMENSION_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ return SQ_TEX_DIMENSION_2D;
+ case PIPE_TEXTURE_3D:
+ assert(0); /* TODO */
+ return SQ_TEX_DIMENSION_3D;
+ case PIPE_TEXTURE_CUBE:
+ return SQ_TEX_DIMENSION_CUBE;
+ }
}
static struct pipe_sampler_view *
fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
- const struct pipe_sampler_view *cso)
+ const struct pipe_sampler_view *cso)
{
- struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
- struct fd_resource *rsc = fd_resource(prsc);
- struct surface_format fmt = fd2_pipe2surface(cso->format);
-
- if (!so)
- return NULL;
-
- so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
- so->base.reference.count = 1;
- so->base.context = pctx;
-
- so->tex0 =
- A2XX_SQ_TEX_0_SIGN_X(fmt.sign) |
- A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) |
- A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) |
- A2XX_SQ_TEX_0_SIGN_W(fmt.sign) |
- A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) *
- util_format_get_blockwidth(prsc->format)) |
- COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED);
- so->tex1 =
- A2XX_SQ_TEX_1_FORMAT(fmt.format) |
- A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
- so->tex2 =
- A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
- A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
- so->tex3 =
- A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
- fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a) |
- A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
-
- so->tex4 =
- A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
- A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
-
- so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
-
- return &so->base;
+ struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+ struct surface_format fmt = fd2_pipe2surface(cso->format);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->tex0 = A2XX_SQ_TEX_0_SIGN_X(fmt.sign) | A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) |
+ A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) | A2XX_SQ_TEX_0_SIGN_W(fmt.sign) |
+ A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) *
+ util_format_get_blockwidth(prsc->format)) |
+ COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED);
+ so->tex1 = A2XX_SQ_TEX_1_FORMAT(fmt.format) |
+ A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
+ so->tex2 = A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
+ A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
+ so->tex3 = A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
+ fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a) |
+ A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
+
+ so->tex4 = A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
+ A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
+
+ so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
+
+ return &so->base;
}
static void
fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
- unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
- in_dt
+ unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views) in_dt
{
- if (shader == PIPE_SHADER_FRAGMENT) {
- struct fd_context *ctx = fd_context(pctx);
-
- /* on a2xx, since there is a flat address space for textures/samplers,
- * a change in # of fragment textures/samplers will trigger patching and
- * re-emitting the vertex shader:
- */
- if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures)
- ctx->dirty |= FD_DIRTY_TEXSTATE;
- }
-
- fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ struct fd_context *ctx = fd_context(pctx);
+
+ /* on a2xx, since there is a flat address space for textures/samplers,
+ * a change in # of fragment textures/samplers will trigger patching and
+ * re-emitting the vertex shader:
+ */
+ if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures)
+ ctx->dirty |= FD_DIRTY_TEXSTATE;
+ }
+
+ fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
+ views);
}
/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
*/
unsigned
fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
- unsigned samp_id)
- assert_dt
+ unsigned samp_id) assert_dt
{
- if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT])
- return samp_id;
- return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
+ if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT])
+ return samp_id;
+ return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
}
void
fd2_texture_init(struct pipe_context *pctx)
{
- pctx->create_sampler_state = fd2_sampler_state_create;
- pctx->bind_sampler_states = fd2_sampler_states_bind;
- pctx->create_sampler_view = fd2_sampler_view_create;
- pctx->set_sampler_views = fd2_set_sampler_views;
+ pctx->create_sampler_state = fd2_sampler_state_create;
+ pctx->bind_sampler_states = fd2_sampler_states_bind;
+ pctx->create_sampler_view = fd2_sampler_view_create;
+ pctx->set_sampler_views = fd2_set_sampler_views;
}
#include "pipe/p_context.h"
-#include "freedreno_texture.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "fd2_context.h"
#include "fd2_util.h"
struct fd2_sampler_stateobj {
- struct pipe_sampler_state base;
- uint32_t tex0, tex3, tex4;
+ struct pipe_sampler_state base;
+ uint32_t tex0, tex3, tex4;
};
static inline struct fd2_sampler_stateobj *
fd2_sampler_stateobj(struct pipe_sampler_state *samp)
{
- return (struct fd2_sampler_stateobj *)samp;
+ return (struct fd2_sampler_stateobj *)samp;
}
struct fd2_pipe_sampler_view {
- struct pipe_sampler_view base;
- uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
+ struct pipe_sampler_view base;
+ uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
};
static inline struct fd2_pipe_sampler_view *
fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
{
- return (struct fd2_pipe_sampler_view *)pview;
+ return (struct fd2_pipe_sampler_view *)pview;
}
unsigned fd2_get_const_idx(struct fd_context *ctx,
- struct fd_texture_stateobj *tex, unsigned samp_id);
+ struct fd_texture_stateobj *tex, unsigned samp_id);
void fd2_texture_init(struct pipe_context *pctx);
static enum a2xx_sq_surfaceformat
pipe2surface(enum pipe_format format, struct surface_format *fmt)
{
- const struct util_format_description *desc = util_format_description(format);
-
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- switch (format) {
- /* Compressed textures. */
- case PIPE_FORMAT_ETC1_RGB8:
- return FMT_ETC1_RGB;
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- return FMT_DXT1;
- case PIPE_FORMAT_DXT3_RGBA:
- return FMT_DXT2_3;
- case PIPE_FORMAT_DXT5_RGBA:
- return FMT_DXT4_5;
- case PIPE_FORMAT_ATC_RGB:
- return FMT_ATI_TC_555_565_RGB;
- case PIPE_FORMAT_ATC_RGBA_EXPLICIT:
- return FMT_ATI_TC_555_565_RGBA;
- case PIPE_FORMAT_ATC_RGBA_INTERPOLATED:
- return FMT_ATI_TC_555_565_RGBA_INTERP;
- /* YUV buffers. */
- case PIPE_FORMAT_UYVY:
- return FMT_Y1_Cr_Y0_Cb;
- case PIPE_FORMAT_YUYV:
- return FMT_Cr_Y1_Cb_Y0;
- default:
- return ~0;
- }
- }
-
- uint32_t channel_size = 0;
- for (unsigned i = 0; i < 4; i++)
- channel_size |= desc->channel[i].size << i*8;
-
- unsigned i = util_format_get_first_non_void_channel(format);
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
- desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
- fmt->sign = SQ_TEX_SIGN_SIGNED;
- if (!desc->channel[i].normalized)
- fmt->num_format = SQ_TEX_NUM_FORMAT_INT;
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
- fmt->exp_adjust = -16;
-
- /* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch
- * we can use the 4 channel format and ignore the 4th component just isn't used
- * XXX: is it possible for the extra loaded component to cause a MMU fault?
- */
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ switch (format) {
+ /* Compressed textures. */
+ case PIPE_FORMAT_ETC1_RGB8:
+ return FMT_ETC1_RGB;
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ return FMT_DXT1;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return FMT_DXT2_3;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FMT_DXT4_5;
+ case PIPE_FORMAT_ATC_RGB:
+ return FMT_ATI_TC_555_565_RGB;
+ case PIPE_FORMAT_ATC_RGBA_EXPLICIT:
+ return FMT_ATI_TC_555_565_RGBA;
+ case PIPE_FORMAT_ATC_RGBA_INTERPOLATED:
+ return FMT_ATI_TC_555_565_RGBA_INTERP;
+ /* YUV buffers. */
+ case PIPE_FORMAT_UYVY:
+ return FMT_Y1_Cr_Y0_Cb;
+ case PIPE_FORMAT_YUYV:
+ return FMT_Cr_Y1_Cb_Y0;
+ default:
+ return ~0;
+ }
+ }
+
+ uint32_t channel_size = 0;
+ for (unsigned i = 0; i < 4; i++)
+ channel_size |= desc->channel[i].size << i * 8;
+
+ unsigned i = util_format_get_first_non_void_channel(format);
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
+ fmt->sign = SQ_TEX_SIGN_SIGNED;
+ if (!desc->channel[i].normalized)
+ fmt->num_format = SQ_TEX_NUM_FORMAT_INT;
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
+ fmt->exp_adjust = -16;
+
+ /* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch
+ * we can use the 4 channel format and ignore the 4th component just isn't
+ * used
+ * XXX: is it possible for the extra loaded component to cause a MMU fault?
+ */
#define CASE(r, g, b, a) case (r | g << 8 | b << 16 | a << 24)
/* clang-format on */
#undef CASE
- return ~0;
+ return ~0;
}
struct surface_format
fd2_pipe2surface(enum pipe_format format)
{
- struct surface_format fmt = {
- .sign = SQ_TEX_SIGN_UNSIGNED,
- .num_format = SQ_TEX_NUM_FORMAT_FRAC,
- .exp_adjust = 0,
- };
- fmt.format = pipe2surface(format, &fmt);
- return fmt;
+ struct surface_format fmt = {
+ .sign = SQ_TEX_SIGN_UNSIGNED,
+ .num_format = SQ_TEX_NUM_FORMAT_FRAC,
+ .exp_adjust = 0,
+ };
+ fmt.format = pipe2surface(format, &fmt);
+ return fmt;
}
enum a2xx_colorformatx
fd2_pipe2color(enum pipe_format format)
{
- switch (format) {
- /* 8-bit buffers. */
- case PIPE_FORMAT_R8_UNORM:
- return COLORX_8;
- case PIPE_FORMAT_B2G3R3_UNORM:
- return COLORX_2_3_3; /* note: untested */
-
- /* 16-bit buffers. */
- case PIPE_FORMAT_B5G6R5_UNORM:
- return COLORX_5_6_5;
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B5G5R5X1_UNORM:
- return COLORX_1_5_5_5;
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- case PIPE_FORMAT_B4G4R4X4_UNORM:
- return COLORX_4_4_4_4;
- case PIPE_FORMAT_R8G8_UNORM:
- return COLORX_8_8;
-
- /* 32-bit buffers. */
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- case PIPE_FORMAT_R8G8B8X8_UNORM:
- return COLORX_8_8_8_8;
- /* Note: snorm untested */
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- case PIPE_FORMAT_R8G8B8X8_SNORM:
- return COLORX_S8_8_8_8;
-
- /* float buffers */
- case PIPE_FORMAT_R16_FLOAT:
- return COLORX_16_FLOAT;
- case PIPE_FORMAT_R16G16_FLOAT:
- return COLORX_16_16_FLOAT;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- return COLORX_16_16_16_16_FLOAT;
- case PIPE_FORMAT_R32_FLOAT:
- return COLORX_32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return COLORX_32_32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return COLORX_32_32_32_32_FLOAT;
-
- default:
- return ~0;
- }
+ switch (format) {
+ /* 8-bit buffers. */
+ case PIPE_FORMAT_R8_UNORM:
+ return COLORX_8;
+ case PIPE_FORMAT_B2G3R3_UNORM:
+ return COLORX_2_3_3; /* note: untested */
+
+ /* 16-bit buffers. */
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return COLORX_5_6_5;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ return COLORX_1_5_5_5;
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ return COLORX_4_4_4_4;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return COLORX_8_8;
+
+ /* 32-bit buffers. */
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ return COLORX_8_8_8_8;
+ /* Note: snorm untested */
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8X8_SNORM:
+ return COLORX_S8_8_8_8;
+
+ /* float buffers */
+ case PIPE_FORMAT_R16_FLOAT:
+ return COLORX_16_FLOAT;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ return COLORX_16_16_FLOAT;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ return COLORX_16_16_16_16_FLOAT;
+ case PIPE_FORMAT_R32_FLOAT:
+ return COLORX_32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return COLORX_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return COLORX_32_32_32_32_FLOAT;
+
+ default:
+ return ~0;
+ }
}
static inline enum sq_tex_swiz
tex_swiz(unsigned swiz)
{
- switch (swiz) {
- default:
- case PIPE_SWIZZLE_X: return SQ_TEX_X;
- case PIPE_SWIZZLE_Y: return SQ_TEX_Y;
- case PIPE_SWIZZLE_Z: return SQ_TEX_Z;
- case PIPE_SWIZZLE_W: return SQ_TEX_W;
- case PIPE_SWIZZLE_0: return SQ_TEX_ZERO;
- case PIPE_SWIZZLE_1: return SQ_TEX_ONE;
- }
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_X:
+ return SQ_TEX_X;
+ case PIPE_SWIZZLE_Y:
+ return SQ_TEX_Y;
+ case PIPE_SWIZZLE_Z:
+ return SQ_TEX_Z;
+ case PIPE_SWIZZLE_W:
+ return SQ_TEX_W;
+ case PIPE_SWIZZLE_0:
+ return SQ_TEX_ZERO;
+ case PIPE_SWIZZLE_1:
+ return SQ_TEX_ONE;
+ }
}
uint32_t
fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+ unsigned swizzle_b, unsigned swizzle_a)
{
- const struct util_format_description *desc =
- util_format_description(format);
- unsigned char swiz[4] = {
- swizzle_r, swizzle_g, swizzle_b, swizzle_a,
- }, rswiz[4];
-
- util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
-
- return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) |
- A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) |
- A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) |
- A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3]));
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned char swiz[4] =
+ {
+ swizzle_r,
+ swizzle_g,
+ swizzle_b,
+ swizzle_a,
+ },
+ rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) |
+ A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3]));
}
uint32_t
fd2_vtx_swiz(enum pipe_format format, unsigned swizzle)
{
- const struct util_format_description *desc =
- util_format_description(format);
- unsigned char swiz[4], rswiz[4];
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned char swiz[4], rswiz[4];
- for (unsigned i = 0; i < 4; i++)
- swiz[i] = (swizzle >> i * 3) & 7;
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = (swizzle >> i * 3) & 7;
- util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
- return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9;
+ return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9;
}
struct surface_format {
/* If enum is a signed type, 0x7f is out of range. Cast it to avoid warnings. */
-#define FMT_INVALID ((enum a2xx_sq_surfaceformat) 0x7f)
- enum a2xx_sq_surfaceformat format : 7;
- enum sq_tex_sign sign : 2;
- enum sq_tex_num_format num_format : 1;
- int exp_adjust : 6;
+#define FMT_INVALID ((enum a2xx_sq_surfaceformat)0x7f)
+ enum a2xx_sq_surfaceformat format : 7;
+ enum sq_tex_sign sign : 2;
+ enum sq_tex_num_format num_format : 1;
+ int exp_adjust : 6;
};
struct surface_format fd2_pipe2surface(enum pipe_format format);
enum a2xx_colorformatx fd2_pipe2color(enum pipe_format format);
uint32_t fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r,
- unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+ unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
uint32_t fd2_vtx_swiz(enum pipe_format format, unsigned swizzle);
/* convert x,y to dword */
-static inline uint32_t xy2d(uint16_t x, uint16_t y)
+static inline uint32_t
+xy2d(uint16_t x, uint16_t y)
{
- return ((y & 0x3fff) << 16) | (x & 0x3fff);
+ return ((y & 0x3fff) << 16) | (x & 0x3fff);
}
#endif /* FD2_UTIL_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd2_zsa.h"
#include "fd2_context.h"
#include "fd2_util.h"
+#include "fd2_zsa.h"
void *
fd2_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso)
+ const struct pipe_depth_stencil_alpha_state *cso)
{
- struct fd2_zsa_stateobj *so;
+ struct fd2_zsa_stateobj *so;
- so = CALLOC_STRUCT(fd2_zsa_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd2_zsa_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- so->rb_depthcontrol |=
- A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
+ so->rb_depthcontrol |=
+ A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
- if (cso->depth_enabled)
- so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE |
- COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
- if (cso->depth_writemask)
- so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
+ if (cso->depth_enabled)
+ so->rb_depthcontrol |=
+ A2XX_RB_DEPTHCONTROL_Z_ENABLE |
+ COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+ if (cso->depth_writemask)
+ so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
- if (cso->stencil[0].enabled) {
- const struct pipe_stencil_state *s = &cso->stencil[0];
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
- so->rb_depthcontrol |=
- A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
- A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
- A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
- A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
- A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
- so->rb_stencilrefmask |=
- 0xff000000 | /* ??? */
- A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
- A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+ so->rb_depthcontrol |=
+ A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+ A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
+ A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
+ A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
+ A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencilrefmask |=
+ 0xff000000 | /* ??? */
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
- if (cso->stencil[1].enabled) {
- const struct pipe_stencil_state *bs = &cso->stencil[1];
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
- so->rb_depthcontrol |=
- A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
- A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
- A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
- A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
- A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
- so->rb_stencilrefmask_bf |=
- 0xff000000 | /* ??? */
- A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
- A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
- }
- }
+ so->rb_depthcontrol |=
+ A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
+ A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
+ A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ 0xff000000 | /* ??? */
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+ A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+ }
+ }
- if (cso->alpha_enabled) {
- so->rb_colorcontrol =
- A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) |
- A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
- so->rb_alpha_ref = fui(cso->alpha_ref_value);
- }
+ if (cso->alpha_enabled) {
+ so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) |
+ A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
+ so->rb_alpha_ref = fui(cso->alpha_ref_value);
+ }
- return so;
+ return so;
}
#ifndef FD2_ZSA_H_
#define FD2_ZSA_H_
-
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd2_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state base;
- uint32_t rb_depthcontrol;
- uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */
- uint32_t rb_alpha_ref;
- uint32_t rb_stencilrefmask;
- uint32_t rb_stencilrefmask_bf;
+ struct pipe_depth_stencil_alpha_state base;
+ uint32_t rb_depthcontrol;
+ uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */
+ uint32_t rb_alpha_ref;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
};
static inline struct fd2_zsa_stateobj *
fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
- return (struct fd2_zsa_stateobj *)zsa;
+ return (struct fd2_zsa_stateobj *)zsa;
}
-void * fd2_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso);
+void *fd2_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD2_ZSA_H_ */
#include "ir2_private.h"
-static bool scalar_possible(struct ir2_instr *instr)
+static bool
+scalar_possible(struct ir2_instr *instr)
{
- if (instr->alu.scalar_opc == SCALAR_NONE)
- return false;
+ if (instr->alu.scalar_opc == SCALAR_NONE)
+ return false;
- return src_ncomp(instr) == 1;
+ return src_ncomp(instr) == 1;
}
-static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
+static bool
+is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
{
- if (!a)
- return true;
+ if (!a)
+ return true;
- /* dont use same instruction twice */
- if (a == b)
- return false;
+ /* dont use same instruction twice */
+ if (a == b)
+ return false;
- /* PRED_SET must be alone */
- if (b->alu.scalar_opc >= PRED_SETEs &&
- b->alu.scalar_opc <= PRED_SET_RESTOREs)
- return false;
+ /* PRED_SET must be alone */
+ if (b->alu.scalar_opc >= PRED_SETEs &&
+ b->alu.scalar_opc <= PRED_SET_RESTOREs)
+ return false;
- /* must write to same export (issues otherwise?) */
- return a->alu.export == b->alu.export;
+ /* must write to same export (issues otherwise?) */
+ return a->alu.export == b->alu.export;
}
/* priority of vector instruction for scheduling (lower=higher prio) */
-static unsigned alu_vector_prio(struct ir2_instr *instr)
+static unsigned
+alu_vector_prio(struct ir2_instr *instr)
{
- if (instr->alu.vector_opc == VECTOR_NONE)
- return ~0u;
+ if (instr->alu.vector_opc == VECTOR_NONE)
+ return ~0u;
- if (is_export(instr))
- return 4;
+ if (is_export(instr))
+ return 4;
- /* TODO check src type and ncomps */
- if (instr->src_count == 3)
- return 0;
+ /* TODO check src type and ncomps */
+ if (instr->src_count == 3)
+ return 0;
- if (!scalar_possible(instr))
- return 1;
+ if (!scalar_possible(instr))
+ return 1;
- return instr->src_count == 2 ? 2 : 3;
+ return instr->src_count == 2 ? 2 : 3;
}
/* priority of scalar instruction for scheduling (lower=higher prio) */
-static unsigned alu_scalar_prio(struct ir2_instr *instr)
+static unsigned
+alu_scalar_prio(struct ir2_instr *instr)
{
- if (!scalar_possible(instr))
- return ~0u;
+ if (!scalar_possible(instr))
+ return ~0u;
- /* this case is dealt with later */
- if (instr->src_count > 1)
- return ~0u;
+ /* this case is dealt with later */
+ if (instr->src_count > 1)
+ return ~0u;
- if (is_export(instr))
- return 4;
+ if (is_export(instr))
+ return 4;
- /* PRED to end of block */
- if (instr->alu.scalar_opc >= PRED_SETEs &&
- instr->alu.scalar_opc <= PRED_SET_RESTOREs)
- return 5;
+ /* PRED to end of block */
+ if (instr->alu.scalar_opc >= PRED_SETEs &&
+ instr->alu.scalar_opc <= PRED_SET_RESTOREs)
+ return 5;
- /* scalar only have highest priority */
- return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
+ /* scalar only have highest priority */
+ return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
}
/* this is a bit messy:
* we want to find a slot where we can insert a scalar MOV with
* a vector instruction that was already scheduled
*/
-static struct ir2_sched_instr*
+static struct ir2_sched_instr *
insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
- struct ir2_src src1, unsigned *comp)
+ struct ir2_src src1, unsigned *comp)
{
- struct ir2_sched_instr *sched = NULL, *s;
- unsigned i, mask = 0xf;
+ struct ir2_sched_instr *sched = NULL, *s;
+ unsigned i, mask = 0xf;
- /* go first earliest point where the mov can be inserted */
- for (i = ctx->instr_sched_count-1; i > 0; i--) {
- s = &ctx->instr_sched[i - 1];
+ /* go first earliest point where the mov can be inserted */
+ for (i = ctx->instr_sched_count - 1; i > 0; i--) {
+ s = &ctx->instr_sched[i - 1];
- if (s->instr && s->instr->block_idx != block_idx)
- break;
- if (s->instr_s && s->instr_s->block_idx != block_idx)
- break;
+ if (s->instr && s->instr->block_idx != block_idx)
+ break;
+ if (s->instr_s && s->instr_s->block_idx != block_idx)
+ break;
- if (src1.type == IR2_SRC_SSA) {
- if ((s->instr && s->instr->idx == src1.num) ||
- (s->instr_s && s->instr_s->idx == src1.num))
- break;
- }
+ if (src1.type == IR2_SRC_SSA) {
+ if ((s->instr && s->instr->idx == src1.num) ||
+ (s->instr_s && s->instr_s->idx == src1.num))
+ break;
+ }
- unsigned mr = ~(s->reg_state[reg_idx/8] >> reg_idx%8*4 & 0xf);
- if ((mask & mr) == 0)
- break;
+ unsigned mr = ~(s->reg_state[reg_idx / 8] >> reg_idx % 8 * 4 & 0xf);
+ if ((mask & mr) == 0)
+ break;
- mask &= mr;
- if (s->instr_s || s->instr->src_count == 3)
- continue;
+ mask &= mr;
+ if (s->instr_s || s->instr->src_count == 3)
+ continue;
- if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
- continue;
+ if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
+ continue;
- sched = s;
- }
- *comp = ffs(mask) - 1;
+ sched = s;
+ }
+ *comp = ffs(mask) - 1;
- if (sched) {
- for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
- s->reg_state[reg_idx/8] |= 1 << (*comp+reg_idx%8*4);
- }
+ if (sched) {
+ for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
+ s->reg_state[reg_idx / 8] |= 1 << (*comp + reg_idx % 8 * 4);
+ }
- return sched;
+ return sched;
}
/* case1:
static bool
scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order)
{
- struct ir2_src src0 = instr->src[ order];
- struct ir2_src src1 = instr->src[!order];
- struct ir2_sched_instr *sched;
- struct ir2_instr *ins;
- struct ir2_reg *reg;
- unsigned idx, comp;
-
- switch (src0.type) {
- case IR2_SRC_CONST:
- case IR2_SRC_INPUT:
- return false;
- default:
- break;
- }
-
- /* TODO, insert needs logic for this */
- if (src1.type == IR2_SRC_REG)
- return false;
-
- /* we could do something if they match src1.. */
- if (src0.negate || src0.abs)
- return false;
-
- reg = get_reg_src(ctx, &src0);
-
- /* result not used more since we will overwrite */
- for (int i = 0; i < 4; i++)
- if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
- return false;
-
- /* find a place to insert the mov */
- sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
- if (!sched)
- return false;
-
- ins = &ctx->instr[idx = ctx->instr_count++];
- ins->idx = idx;
- ins->type = IR2_ALU;
- ins->src[0] = src1;
- ins->src_count = 1;
- ins->is_ssa = true;
- ins->ssa.idx = reg->idx;
- ins->ssa.ncomp = 1;
- ins->ssa.comp[0].c = comp;
- ins->alu.scalar_opc = MAXs;
- ins->alu.export = -1;
- ins->alu.write_mask = 1;
- ins->pred = instr->pred;
- ins->block_idx = instr->block_idx;
-
- instr->src[0] = src0;
- instr->alu.src1_swizzle = comp;
-
- sched->instr_s = ins;
- return true;
+ struct ir2_src src0 = instr->src[order];
+ struct ir2_src src1 = instr->src[!order];
+ struct ir2_sched_instr *sched;
+ struct ir2_instr *ins;
+ struct ir2_reg *reg;
+ unsigned idx, comp;
+
+ switch (src0.type) {
+ case IR2_SRC_CONST:
+ case IR2_SRC_INPUT:
+ return false;
+ default:
+ break;
+ }
+
+ /* TODO, insert needs logic for this */
+ if (src1.type == IR2_SRC_REG)
+ return false;
+
+ /* we could do something if they match src1.. */
+ if (src0.negate || src0.abs)
+ return false;
+
+ reg = get_reg_src(ctx, &src0);
+
+ /* result not used more since we will overwrite */
+ for (int i = 0; i < 4; i++)
+ if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
+ return false;
+
+ /* find a place to insert the mov */
+ sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
+ if (!sched)
+ return false;
+
+ ins = &ctx->instr[idx = ctx->instr_count++];
+ ins->idx = idx;
+ ins->type = IR2_ALU;
+ ins->src[0] = src1;
+ ins->src_count = 1;
+ ins->is_ssa = true;
+ ins->ssa.idx = reg->idx;
+ ins->ssa.ncomp = 1;
+ ins->ssa.comp[0].c = comp;
+ ins->alu.scalar_opc = MAXs;
+ ins->alu.export = -1;
+ ins->alu.write_mask = 1;
+ ins->pred = instr->pred;
+ ins->block_idx = instr->block_idx;
+
+ instr->src[0] = src0;
+ instr->alu.src1_swizzle = comp;
+
+ sched->instr_s = ins;
+ return true;
}
/* fill sched with next fetch or (vector and/or scalar) alu instruction */
-static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
+static int
+sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
{
- struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
- unsigned avail_count = 0;
-
- instr_alloc_type_t export = ~0u;
- int block_idx = -1;
-
- /* XXX merge this loop with the other one somehow? */
- ir2_foreach_instr(instr, ctx) {
- if (!instr->need_emit)
- continue;
- if (is_export(instr))
- export = MIN2(export, export_buf(instr->alu.export));
- }
-
- ir2_foreach_instr(instr, ctx) {
- if (!instr->need_emit)
- continue;
-
- /* dont mix exports */
- if (is_export(instr) && export_buf(instr->alu.export) != export)
- continue;
-
- if (block_idx < 0)
- block_idx = instr->block_idx;
- else if (block_idx != instr->block_idx || /* must be same block */
- instr->type == IR2_CF || /* CF/MEM must be alone */
- (is_export(instr) && export == SQ_MEMORY))
- break;
- /* it works because IR2_CF is always at end of block
- * and somewhat same idea with MEM exports, which might not be alone
- * but will end up in-order at least
- */
-
- /* check if dependencies are satisfied */
- bool is_ok = true;
- ir2_foreach_src(src, instr) {
- if (src->type == IR2_SRC_REG) {
- /* need to check if all previous instructions in the block
- * which write the reg have been emitted
- * slow..
- * XXX: check components instead of whole register
- */
- struct ir2_reg *reg = get_reg_src(ctx, src);
- ir2_foreach_instr(p, ctx) {
- if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
- is_ok &= !p->need_emit;
- }
- } else if (src->type == IR2_SRC_SSA) {
- /* in this case its easy, just check need_emit */
- is_ok &= !ctx->instr[src->num].need_emit;
- }
- }
- /* don't reorder non-ssa write before read */
- if (!instr->is_ssa) {
- ir2_foreach_instr(p, ctx) {
- if (!p->need_emit || p->idx >= instr->idx)
- continue;
-
- ir2_foreach_src(src, p) {
- if (get_reg_src(ctx, src) == instr->reg)
- is_ok = false;
- }
- }
- }
- /* don't reorder across predicates */
- if (avail_count && instr->pred != avail[0]->pred)
- is_ok = false;
-
- if (!is_ok)
- continue;
-
- avail[avail_count++] = instr;
- }
-
- if (!avail_count) {
- assert(block_idx == -1);
- return -1;
- }
-
- /* priority to FETCH instructions */
- ir2_foreach_avail(instr) {
- if (instr->type == IR2_ALU)
- continue;
-
- ra_src_free(ctx, instr);
- ra_reg(ctx, get_reg(instr), -1, false, 0);
-
- instr->need_emit = false;
- sched->instr = instr;
- sched->instr_s = NULL;
- return block_idx;
- }
-
- /* TODO precompute priorities */
-
- unsigned prio_v = ~0u, prio_s = ~0u, prio;
- ir2_foreach_avail(instr) {
- prio = alu_vector_prio(instr);
- if (prio < prio_v) {
- instr_v = instr;
- prio_v = prio;
- }
- }
-
- /* TODO can still insert scalar if src_count=3, if smart about it */
- if (!instr_v || instr_v->src_count < 3) {
- ir2_foreach_avail(instr) {
- bool compat = is_alu_compatible(instr_v, instr);
-
- prio = alu_scalar_prio(instr);
- if (prio >= prio_v && !compat)
- continue;
-
- if (prio < prio_s) {
- instr_s = instr;
- prio_s = prio;
- if (!compat)
- instr_v = NULL;
- }
- }
- }
-
- assert(instr_v || instr_s);
-
- /* now, we try more complex insertion of vector instruction as scalar
- * TODO: if we are smart we can still insert if instr_v->src_count==3
- */
- if (!instr_s && instr_v->src_count < 3) {
- ir2_foreach_avail(instr) {
- if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
- continue;
-
- /* at this point, src_count should always be 2 */
- assert(instr->src_count == 2);
-
- if (scalarize_case1(ctx, instr, 0)) {
- instr_s = instr;
- break;
- }
- if (scalarize_case1(ctx, instr, 1)) {
- instr_s = instr;
- break;
- }
- }
- }
-
- /* free src registers */
- if (instr_v) {
- instr_v->need_emit = false;
- ra_src_free(ctx, instr_v);
- }
-
- if (instr_s) {
- instr_s->need_emit = false;
- ra_src_free(ctx, instr_s);
- }
-
- /* allocate dst registers */
- if (instr_v)
- ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask);
-
- if (instr_s)
- ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask);
-
- sched->instr = instr_v;
- sched->instr_s = instr_s;
- return block_idx;
+ struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
+ unsigned avail_count = 0;
+
+ instr_alloc_type_t export = ~0u;
+ int block_idx = -1;
+
+ /* XXX merge this loop with the other one somehow? */
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (!instr->need_emit)
+ continue;
+ if (is_export(instr))
+ export = MIN2(export, export_buf(instr->alu.export));
+ }
+
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (!instr->need_emit)
+ continue;
+
+ /* dont mix exports */
+ if (is_export(instr) && export_buf(instr->alu.export) != export)
+ continue;
+
+ if (block_idx < 0)
+ block_idx = instr->block_idx;
+ else if (block_idx != instr->block_idx || /* must be same block */
+ instr->type == IR2_CF || /* CF/MEM must be alone */
+ (is_export(instr) && export == SQ_MEMORY))
+ break;
+ /* it works because IR2_CF is always at end of block
+ * and somewhat same idea with MEM exports, which might not be alone
+ * but will end up in-order at least
+ */
+
+ /* check if dependencies are satisfied */
+ bool is_ok = true;
+ ir2_foreach_src(src, instr)
+ {
+ if (src->type == IR2_SRC_REG) {
+ /* need to check if all previous instructions in the block
+ * which write the reg have been emitted
+ * slow..
+ * XXX: check components instead of whole register
+ */
+ struct ir2_reg *reg = get_reg_src(ctx, src);
+ ir2_foreach_instr(p, ctx)
+ {
+ if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
+ is_ok &= !p->need_emit;
+ }
+ } else if (src->type == IR2_SRC_SSA) {
+ /* in this case its easy, just check need_emit */
+ is_ok &= !ctx->instr[src->num].need_emit;
+ }
+ }
+ /* don't reorder non-ssa write before read */
+ if (!instr->is_ssa) {
+ ir2_foreach_instr(p, ctx)
+ {
+ if (!p->need_emit || p->idx >= instr->idx)
+ continue;
+
+ ir2_foreach_src(src, p)
+ {
+ if (get_reg_src(ctx, src) == instr->reg)
+ is_ok = false;
+ }
+ }
+ }
+ /* don't reorder across predicates */
+ if (avail_count && instr->pred != avail[0]->pred)
+ is_ok = false;
+
+ if (!is_ok)
+ continue;
+
+ avail[avail_count++] = instr;
+ }
+
+ if (!avail_count) {
+ assert(block_idx == -1);
+ return -1;
+ }
+
+ /* priority to FETCH instructions */
+ ir2_foreach_avail(instr)
+ {
+ if (instr->type == IR2_ALU)
+ continue;
+
+ ra_src_free(ctx, instr);
+ ra_reg(ctx, get_reg(instr), -1, false, 0);
+
+ instr->need_emit = false;
+ sched->instr = instr;
+ sched->instr_s = NULL;
+ return block_idx;
+ }
+
+ /* TODO precompute priorities */
+
+ unsigned prio_v = ~0u, prio_s = ~0u, prio;
+ ir2_foreach_avail(instr)
+ {
+ prio = alu_vector_prio(instr);
+ if (prio < prio_v) {
+ instr_v = instr;
+ prio_v = prio;
+ }
+ }
+
+ /* TODO can still insert scalar if src_count=3, if smart about it */
+ if (!instr_v || instr_v->src_count < 3) {
+ ir2_foreach_avail(instr)
+ {
+ bool compat = is_alu_compatible(instr_v, instr);
+
+ prio = alu_scalar_prio(instr);
+ if (prio >= prio_v && !compat)
+ continue;
+
+ if (prio < prio_s) {
+ instr_s = instr;
+ prio_s = prio;
+ if (!compat)
+ instr_v = NULL;
+ }
+ }
+ }
+
+ assert(instr_v || instr_s);
+
+ /* now, we try more complex insertion of vector instruction as scalar
+ * TODO: if we are smart we can still insert if instr_v->src_count==3
+ */
+ if (!instr_s && instr_v->src_count < 3) {
+ ir2_foreach_avail(instr)
+ {
+ if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
+ continue;
+
+ /* at this point, src_count should always be 2 */
+ assert(instr->src_count == 2);
+
+ if (scalarize_case1(ctx, instr, 0)) {
+ instr_s = instr;
+ break;
+ }
+ if (scalarize_case1(ctx, instr, 1)) {
+ instr_s = instr;
+ break;
+ }
+ }
+ }
+
+ /* free src registers */
+ if (instr_v) {
+ instr_v->need_emit = false;
+ ra_src_free(ctx, instr_v);
+ }
+
+ if (instr_s) {
+ instr_s->need_emit = false;
+ ra_src_free(ctx, instr_s);
+ }
+
+ /* allocate dst registers */
+ if (instr_v)
+ ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v),
+ instr_v->alu.write_mask);
+
+ if (instr_s)
+ ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s),
+ instr_s->alu.write_mask);
+
+ sched->instr = instr_v;
+ sched->instr_s = instr_s;
+ return block_idx;
}
/* scheduling: determine order of instructions */
-static void schedule_instrs(struct ir2_context *ctx)
+static void
+schedule_instrs(struct ir2_context *ctx)
{
- struct ir2_sched_instr *sched;
- int block_idx;
-
- /* allocate input registers */
- for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
- if (ctx->input[idx].initialized)
- ra_reg(ctx, &ctx->input[idx], idx, false, 0);
-
- for (;;) {
- sched = &ctx->instr_sched[ctx->instr_sched_count++];
- block_idx = sched_next(ctx, sched);
- if (block_idx < 0)
- break;
- memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
-
- /* catch texture fetch after scheduling and insert the
- * SET_TEX_LOD right before it if necessary
- * TODO clean this up
- */
- struct ir2_instr *instr = sched->instr, *tex_lod;
- if (instr && instr->type == IR2_FETCH &&
- instr->fetch.opc == TEX_FETCH && instr->src_count == 2) {
- /* generate the SET_LOD instruction */
- tex_lod = &ctx->instr[ctx->instr_count++];
- tex_lod->type = IR2_FETCH;
- tex_lod->block_idx = instr->block_idx;
- tex_lod->pred = instr->pred;
- tex_lod->fetch.opc = TEX_SET_TEX_LOD;
- tex_lod->src[0] = instr->src[1];
- tex_lod->src_count = 1;
-
- sched[1] = sched[0];
- sched->instr = tex_lod;
- ctx->instr_sched_count++;
- }
-
- bool free_block = true;
- ir2_foreach_instr(instr, ctx)
- free_block &= instr->block_idx != block_idx;
- if (free_block)
- ra_block_free(ctx, block_idx);
- };
- ctx->instr_sched_count--;
+ struct ir2_sched_instr *sched;
+ int block_idx;
+
+ /* allocate input registers */
+ for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
+ if (ctx->input[idx].initialized)
+ ra_reg(ctx, &ctx->input[idx], idx, false, 0);
+
+ for (;;) {
+ sched = &ctx->instr_sched[ctx->instr_sched_count++];
+ block_idx = sched_next(ctx, sched);
+ if (block_idx < 0)
+ break;
+ memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
+
+ /* catch texture fetch after scheduling and insert the
+ * SET_TEX_LOD right before it if necessary
+ * TODO clean this up
+ */
+ struct ir2_instr *instr = sched->instr, *tex_lod;
+ if (instr && instr->type == IR2_FETCH && instr->fetch.opc == TEX_FETCH &&
+ instr->src_count == 2) {
+ /* generate the SET_LOD instruction */
+ tex_lod = &ctx->instr[ctx->instr_count++];
+ tex_lod->type = IR2_FETCH;
+ tex_lod->block_idx = instr->block_idx;
+ tex_lod->pred = instr->pred;
+ tex_lod->fetch.opc = TEX_SET_TEX_LOD;
+ tex_lod->src[0] = instr->src[1];
+ tex_lod->src_count = 1;
+
+ sched[1] = sched[0];
+ sched->instr = tex_lod;
+ ctx->instr_sched_count++;
+ }
+
+ bool free_block = true;
+ ir2_foreach_instr(instr, ctx) free_block &= instr->block_idx != block_idx;
+ if (free_block)
+ ra_block_free(ctx, block_idx);
+ };
+ ctx->instr_sched_count--;
}
void
ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
- struct fd2_shader_stateobj *fp)
+ struct fd2_shader_stateobj *fp)
{
- struct ir2_context ctx = { };
- bool binning = !fp && so->type == MESA_SHADER_VERTEX;
+ struct ir2_context ctx = {};
+ bool binning = !fp && so->type == MESA_SHADER_VERTEX;
- if (fp)
- so->variant[variant].f = fp->variant[0].f;
+ if (fp)
+ so->variant[variant].f = fp->variant[0].f;
- ctx.so = so;
- ctx.info = &so->variant[variant].info;
- ctx.f = &so->variant[variant].f;
- ctx.info->max_reg = -1;
+ ctx.so = so;
+ ctx.info = &so->variant[variant].info;
+ ctx.f = &so->variant[variant].f;
+ ctx.info->max_reg = -1;
- /* convert nir to internal representation */
- ir2_nir_compile(&ctx, binning);
+ /* convert nir to internal representation */
+ ir2_nir_compile(&ctx, binning);
- /* copy propagate srcs */
- cp_src(&ctx);
+ /* copy propagate srcs */
+ cp_src(&ctx);
- /* get ref_counts and kill non-needed instructions */
- ra_count_refs(&ctx);
+ /* get ref_counts and kill non-needed instructions */
+ ra_count_refs(&ctx);
- /* remove movs used to write outputs */
- cp_export(&ctx);
+ /* remove movs used to write outputs */
+ cp_export(&ctx);
- /* instruction order.. and vector->scalar conversions */
- schedule_instrs(&ctx);
+ /* instruction order.. and vector->scalar conversions */
+ schedule_instrs(&ctx);
- /* finally, assemble to bitcode */
- assemble(&ctx, binning);
+ /* finally, assemble to bitcode */
+ assemble(&ctx, binning);
}
#include "pipe/p_context.h"
struct ir2_fetch_info {
- /* dword offset of the fetch instruction */
- uint16_t offset;
- union {
- /* swizzle to merge with tgsi swizzle */
- struct {
- uint16_t dst_swiz;
- } vtx;
- /* sampler id to patch const_idx */
- struct {
- uint16_t samp_id;
- uint8_t src_swiz;
- } tex;
- };
+ /* dword offset of the fetch instruction */
+ uint16_t offset;
+ union {
+ /* swizzle to merge with tgsi swizzle */
+ struct {
+ uint16_t dst_swiz;
+ } vtx;
+ /* sampler id to patch const_idx */
+ struct {
+ uint16_t samp_id;
+ uint8_t src_swiz;
+ } tex;
+ };
};
struct ir2_shader_info {
- /* compiler shader */
- uint32_t *dwords;
+ /* compiler shader */
+ uint32_t *dwords;
- /* size of the compiled shader in dwords */
- uint16_t sizedwords;
+ /* size of the compiled shader in dwords */
+ uint16_t sizedwords;
- /* highest GPR # used by shader */
- int8_t max_reg;
+ /* highest GPR # used by shader */
+ int8_t max_reg;
- /* offset in dwords of first MEMORY export CF (for a20x hw binning) */
- int16_t mem_export_ptr;
+ /* offset in dwords of first MEMORY export CF (for a20x hw binning) */
+ int16_t mem_export_ptr;
- /* fetch instruction info for patching */
- uint16_t num_fetch_instrs;
- struct ir2_fetch_info fetch_info[64];
+ /* fetch instruction info for patching */
+ uint16_t num_fetch_instrs;
+ struct ir2_fetch_info fetch_info[64];
};
struct ir2_frag_linkage {
- unsigned inputs_count;
- struct {
- uint8_t slot;
- uint8_t ncomp;
- } inputs[16];
-
- /* driver_location of fragcoord.zw, -1 if not used */
- int fragcoord;
+ unsigned inputs_count;
+ struct {
+ uint8_t slot;
+ uint8_t ncomp;
+ } inputs[16];
+
+ /* driver_location of fragcoord.zw, -1 if not used */
+ int fragcoord;
};
struct ir2_shader_variant {
- struct ir2_shader_info info;
- struct ir2_frag_linkage f;
+ struct ir2_shader_info info;
+ struct ir2_frag_linkage f;
};
struct fd2_shader_stateobj;
struct tgsi_token;
void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
- struct fd2_shader_stateobj *fp);
+ struct fd2_shader_stateobj *fp);
struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens,
- struct pipe_screen *screen);
+ struct pipe_screen *screen);
const nir_shader_compiler_options *ir2_get_compiler_options(void);
int ir2_optimize_nir(nir_shader *s, bool lower);
-#endif /* IR2_H_ */
+#endif /* IR2_H_ */
static unsigned
src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{
- struct ir2_reg_component *comps;
- unsigned swiz = 0;
-
- switch (src->type) {
- case IR2_SRC_SSA:
- case IR2_SRC_REG:
- break;
- default:
- return src->swizzle;
- }
- /* we need to take into account where the components were allocated */
- comps = get_reg_src(ctx, src)->comp;
- for (int i = 0; i < ncomp; i++) {
- swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
- }
- return swiz;
+ struct ir2_reg_component *comps;
+ unsigned swiz = 0;
+
+ switch (src->type) {
+ case IR2_SRC_SSA:
+ case IR2_SRC_REG:
+ break;
+ default:
+ return src->swizzle;
+ }
+ /* we need to take into account where the components were allocated */
+ comps = get_reg_src(ctx, src)->comp;
+ for (int i = 0; i < ncomp; i++) {
+ swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
+ }
+ return swiz;
}
/* alu instr need to take into how the output components are allocated */
static unsigned
alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg)
{
- /* hardware seems to take from W, but swizzle everywhere just in case */
- return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
+ /* hardware seems to take from W, but swizzle everywhere just in case */
+ return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
}
static unsigned
-alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src)
+alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr,
+ struct ir2_src *src)
{
- struct ir2_reg_component *comp = get_reg(instr)->comp;
- unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
- unsigned swiz = 0;
-
- /* non per component special cases */
- switch (instr->alu.vector_opc) {
- case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
- return alu_swizzle_scalar(ctx, src);
- case DOT2ADDv:
- case DOT3v:
- case DOT4v:
- case CUBEv:
- return swiz0;
- default:
- break;
- }
-
- for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
- if (instr->alu.write_mask & 1 << j) {
- if (comp[j].c != 7)
- swiz |= swiz_set(i, comp[j].c);
- i++;
- }
- }
- return swiz_merge(swiz0, swiz);
+ struct ir2_reg_component *comp = get_reg(instr)->comp;
+ unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
+ unsigned swiz = 0;
+
+ /* non per component special cases */
+ switch (instr->alu.vector_opc) {
+ case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
+ return alu_swizzle_scalar(ctx, src);
+ case DOT2ADDv:
+ case DOT3v:
+ case DOT4v:
+ case CUBEv:
+ return swiz0;
+ default:
+ break;
+ }
+
+ for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
+ if (instr->alu.write_mask & 1 << j) {
+ if (comp[j].c != 7)
+ swiz |= swiz_set(i, comp[j].c);
+ i++;
+ }
+ }
+ return swiz_merge(swiz0, swiz);
}
static unsigned
alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
{
- /* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
- unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
- return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
+ /* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
+ unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
+ return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
}
/* write_mask needs to be transformed by allocation information */
static unsigned
alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
{
- struct ir2_reg_component *comp = get_reg(instr)->comp;
- unsigned write_mask = 0;
+ struct ir2_reg_component *comp = get_reg(instr)->comp;
+ unsigned write_mask = 0;
- for (int i = 0; i < 4; i++) {
- if (instr->alu.write_mask & 1 << i)
- write_mask |= 1 << comp[i].c;
- }
+ for (int i = 0; i < 4; i++) {
+ if (instr->alu.write_mask & 1 << i)
+ write_mask |= 1 << comp[i].c;
+ }
- return write_mask;
+ return write_mask;
}
/* fetch instructions can swizzle dest, but src swizzle needs conversion */
static unsigned
fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{
- unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
- unsigned swiz = 0;
- for (int i = 0; i < ncomp; i++)
- swiz |= swiz_get(alu_swiz, i) << i * 2;
- return swiz;
+ unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
+ unsigned swiz = 0;
+ for (int i = 0; i < ncomp; i++)
+ swiz |= swiz_get(alu_swiz, i) << i * 2;
+ return swiz;
}
static unsigned
fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr)
{
- struct ir2_reg_component *comp = get_reg(instr)->comp;
- unsigned dst_swiz = 0xfff;
- for (int i = 0; i < dst_ncomp(instr); i++) {
- dst_swiz &= ~(7 << comp[i].c * 3);
- dst_swiz |= i << comp[i].c * 3;
- }
- return dst_swiz;
+ struct ir2_reg_component *comp = get_reg(instr)->comp;
+ unsigned dst_swiz = 0xfff;
+ for (int i = 0; i < dst_ncomp(instr); i++) {
+ dst_swiz &= ~(7 << comp[i].c * 3);
+ dst_swiz |= i << comp[i].c * 3;
+ }
+ return dst_swiz;
}
/* register / export # for instr */
static unsigned
dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr)
{
- if (is_export(instr))
- return instr->alu.export;
+ if (is_export(instr))
+ return instr->alu.export;
- return get_reg(instr)->idx;
+ return get_reg(instr)->idx;
}
/* register # for src */
-static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
+static unsigned
+src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
{
- return get_reg_src(ctx, src)->idx;
+ return get_reg_src(ctx, src)->idx;
}
-static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
+static unsigned
+src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
{
- if (src->type == IR2_SRC_CONST) {
- assert(!src->abs); /* no abs bit for const */
- return src->num;
- }
- return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
+ if (src->type == IR2_SRC_CONST) {
+ assert(!src->abs); /* no abs bit for const */
+ return src->num;
+ }
+ return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
}
/* produce the 12 byte binary instruction for a given sched_instr */
static void
-fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched,
- instr_t *bc, bool * is_fetch)
+fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched, instr_t *bc,
+ bool *is_fetch)
{
- struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
-
- *bc = (instr_t) {};
-
- if (instr && instr->type == IR2_FETCH) {
- *is_fetch = true;
-
- bc->fetch.opc = instr->fetch.opc;
- bc->fetch.pred_select = !!instr->pred;
- bc->fetch.pred_condition = instr->pred & 1;
-
- struct ir2_src *src = instr->src;
-
- if (instr->fetch.opc == VTX_FETCH) {
- instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
-
- assert(instr->fetch.vtx.const_idx <= 0x1f);
- assert(instr->fetch.vtx.const_idx_sel <= 0x3);
-
- vtx->src_reg = src_to_reg(ctx, src);
- vtx->src_swiz = fetch_swizzle(ctx, src, 1);
- vtx->dst_reg = dst_to_reg(ctx, instr);
- vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
-
- vtx->must_be_one = 1;
- vtx->const_index = instr->fetch.vtx.const_idx;
- vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
-
- /* other fields will be patched */
-
- /* XXX seems like every FETCH but the first has
- * this bit set:
- */
- vtx->reserved3 = instr->idx ? 0x1 : 0x0;
- vtx->reserved0 = instr->idx ? 0x2 : 0x3;
- } else if (instr->fetch.opc == TEX_FETCH) {
- instr_fetch_tex_t *tex = &bc->fetch.tex;
-
- tex->src_reg = src_to_reg(ctx, src);
- tex->src_swiz = fetch_swizzle(ctx, src, 3);
- tex->dst_reg = dst_to_reg(ctx, instr);
- tex->dst_swiz = fetch_dst_swiz(ctx, instr);
- /* tex->const_idx = patch_fetches */
- tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
- tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
- tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
- tex->use_reg_lod = instr->src_count == 2;
- tex->sample_location = SAMPLE_CENTER;
- tex->tx_coord_denorm = instr->fetch.tex.is_rect;
- } else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
- instr_fetch_tex_t *tex = &bc->fetch.tex;
-
- tex->src_reg = src_to_reg(ctx, src);
- tex->src_swiz = fetch_swizzle(ctx, src, 1);
- tex->dst_reg = 0;
- tex->dst_swiz = 0xfff;
-
- tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
- tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
- tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
- tex->use_comp_lod = 1;
- tex->use_reg_lod = 0;
- tex->sample_location = SAMPLE_CENTER;
- } else {
- assert(0);
- }
- return;
- }
-
- instr_v = sched->instr;
- instr_s = sched->instr_s;
-
- if (instr_v) {
- struct ir2_src src1, src2, *src3;
-
- src1 = instr_v->src[0];
- src2 = instr_v->src[instr_v->src_count > 1];
- src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
-
- bc->alu.vector_opc = instr_v->alu.vector_opc;
- bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
- bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
- bc->alu.vector_clamp = instr_v->alu.saturate;
- bc->alu.export_data = instr_v->alu.export >= 0;
-
- /* single operand SETEv, use 0.0f as src2 */
- if (instr_v->src_count == 1 &&
- (bc->alu.vector_opc == SETEv ||
- bc->alu.vector_opc == SETNEv ||
- bc->alu.vector_opc == SETGTv ||
- bc->alu.vector_opc == SETGTEv))
- src2 = ir2_zero(ctx);
-
- /* export32 instr for a20x hw binning has this bit set..
- * it seems to do more than change the base address of constants
- * XXX this is a hack
- */
- bc->alu.relative_addr =
- (bc->alu.export_data && bc->alu.vector_dest == 32);
-
- bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
- bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
- bc->alu.src1_reg_negate = src1.negate;
- bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
-
- bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
- bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
- bc->alu.src2_reg_negate = src2.negate;
- bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
-
- if (src3) {
- bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
- bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
- bc->alu.src3_reg_negate = src3->negate;
- bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
- }
-
- bc->alu.pred_select = instr_v->pred;
- }
-
- if (instr_s) {
- struct ir2_src *src = instr_s->src;
-
- bc->alu.scalar_opc = instr_s->alu.scalar_opc;
- bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
- bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
- bc->alu.scalar_clamp = instr_s->alu.saturate;
- bc->alu.export_data = instr_s->alu.export >= 0;
-
- if (instr_s->src_count == 1) {
- bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
- bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
- bc->alu.src3_reg_negate = src->negate;
- bc->alu.src3_sel = src->type != IR2_SRC_CONST;
- } else {
- assert(instr_s->src_count == 2);
-
- bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
- bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
- bc->alu.src3_reg_negate = src->negate;
- bc->alu.src3_sel = src->type != IR2_SRC_CONST;;
- }
-
- if (instr_v)
- assert(instr_s->pred == instr_v->pred);
- bc->alu.pred_select = instr_s->pred;
- }
-
- *is_fetch = false;
- return;
+ struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
+
+ *bc = (instr_t){};
+
+ if (instr && instr->type == IR2_FETCH) {
+ *is_fetch = true;
+
+ bc->fetch.opc = instr->fetch.opc;
+ bc->fetch.pred_select = !!instr->pred;
+ bc->fetch.pred_condition = instr->pred & 1;
+
+ struct ir2_src *src = instr->src;
+
+ if (instr->fetch.opc == VTX_FETCH) {
+ instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
+
+ assert(instr->fetch.vtx.const_idx <= 0x1f);
+ assert(instr->fetch.vtx.const_idx_sel <= 0x3);
+
+ vtx->src_reg = src_to_reg(ctx, src);
+ vtx->src_swiz = fetch_swizzle(ctx, src, 1);
+ vtx->dst_reg = dst_to_reg(ctx, instr);
+ vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
+
+ vtx->must_be_one = 1;
+ vtx->const_index = instr->fetch.vtx.const_idx;
+ vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
+
+ /* other fields will be patched */
+
+ /* XXX seems like every FETCH but the first has
+ * this bit set:
+ */
+ vtx->reserved3 = instr->idx ? 0x1 : 0x0;
+ vtx->reserved0 = instr->idx ? 0x2 : 0x3;
+ } else if (instr->fetch.opc == TEX_FETCH) {
+ instr_fetch_tex_t *tex = &bc->fetch.tex;
+
+ tex->src_reg = src_to_reg(ctx, src);
+ tex->src_swiz = fetch_swizzle(ctx, src, 3);
+ tex->dst_reg = dst_to_reg(ctx, instr);
+ tex->dst_swiz = fetch_dst_swiz(ctx, instr);
+ /* tex->const_idx = patch_fetches */
+ tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+ tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+ tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
+ tex->use_reg_lod = instr->src_count == 2;
+ tex->sample_location = SAMPLE_CENTER;
+ tex->tx_coord_denorm = instr->fetch.tex.is_rect;
+ } else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
+ instr_fetch_tex_t *tex = &bc->fetch.tex;
+
+ tex->src_reg = src_to_reg(ctx, src);
+ tex->src_swiz = fetch_swizzle(ctx, src, 1);
+ tex->dst_reg = 0;
+ tex->dst_swiz = 0xfff;
+
+ tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+ tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+ tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+ tex->use_comp_lod = 1;
+ tex->use_reg_lod = 0;
+ tex->sample_location = SAMPLE_CENTER;
+ } else {
+ assert(0);
+ }
+ return;
+ }
+
+ instr_v = sched->instr;
+ instr_s = sched->instr_s;
+
+ if (instr_v) {
+ struct ir2_src src1, src2, *src3;
+
+ src1 = instr_v->src[0];
+ src2 = instr_v->src[instr_v->src_count > 1];
+ src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
+
+ bc->alu.vector_opc = instr_v->alu.vector_opc;
+ bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
+ bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
+ bc->alu.vector_clamp = instr_v->alu.saturate;
+ bc->alu.export_data = instr_v->alu.export >= 0;
+
+ /* single operand SETEv, use 0.0f as src2 */
+ if (instr_v->src_count == 1 &&
+ (bc->alu.vector_opc == SETEv || bc->alu.vector_opc == SETNEv ||
+ bc->alu.vector_opc == SETGTv || bc->alu.vector_opc == SETGTEv))
+ src2 = ir2_zero(ctx);
+
+ /* export32 instr for a20x hw binning has this bit set..
+ * it seems to do more than change the base address of constants
+ * XXX this is a hack
+ */
+ bc->alu.relative_addr =
+ (bc->alu.export_data && bc->alu.vector_dest == 32);
+
+ bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
+ bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
+ bc->alu.src1_reg_negate = src1.negate;
+ bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
+
+ bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
+ bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
+ bc->alu.src2_reg_negate = src2.negate;
+ bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
+
+ if (src3) {
+ bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
+ bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
+ bc->alu.src3_reg_negate = src3->negate;
+ bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
+ }
+
+ bc->alu.pred_select = instr_v->pred;
+ }
+
+ if (instr_s) {
+ struct ir2_src *src = instr_s->src;
+
+ bc->alu.scalar_opc = instr_s->alu.scalar_opc;
+ bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
+ bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
+ bc->alu.scalar_clamp = instr_s->alu.saturate;
+ bc->alu.export_data = instr_s->alu.export >= 0;
+
+ if (instr_s->src_count == 1) {
+ bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
+ bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
+ bc->alu.src3_reg_negate = src->negate;
+ bc->alu.src3_sel = src->type != IR2_SRC_CONST;
+ } else {
+ assert(instr_s->src_count == 2);
+
+ bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
+ bc->alu.src3_swiz =
+ alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
+ bc->alu.src3_reg_negate = src->negate;
+ bc->alu.src3_sel = src->type != IR2_SRC_CONST;
+ ;
+ }
+
+ if (instr_v)
+ assert(instr_s->pred == instr_v->pred);
+ bc->alu.pred_select = instr_s->pred;
+ }
+
+ *is_fetch = false;
+ return;
}
static unsigned
-write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx,
- instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
+write_cfs(struct ir2_context *ctx, instr_cf_t *cfs, unsigned cf_idx,
+ instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
{
- assert(exec->count);
+ assert(exec->count);
- if (alloc)
- cfs[cf_idx++].alloc = *alloc;
+ if (alloc)
+ cfs[cf_idx++].alloc = *alloc;
- /* for memory alloc offset for patching */
- if (alloc && alloc->buffer_select == SQ_MEMORY &&
- ctx->info->mem_export_ptr == -1)
- ctx->info->mem_export_ptr = cf_idx / 2 * 3;
+ /* for memory alloc offset for patching */
+ if (alloc && alloc->buffer_select == SQ_MEMORY &&
+ ctx->info->mem_export_ptr == -1)
+ ctx->info->mem_export_ptr = cf_idx / 2 * 3;
- cfs[cf_idx++].exec = *exec;
- exec->address += exec->count;
- exec->serialize = 0;
- exec->count = 0;
+ cfs[cf_idx++].exec = *exec;
+ exec->address += exec->count;
+ exec->serialize = 0;
+ exec->count = 0;
- return cf_idx;
+ return cf_idx;
}
/* assemble the final shader */
-void assemble(struct ir2_context *ctx, bool binning)
+void
+assemble(struct ir2_context *ctx, bool binning)
{
- /* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
- * address is 9 bits so could it be 512 ?
- */
- instr_cf_t cfs[384];
- instr_t bytecode[384], bc;
- unsigned block_addr[128];
- unsigned num_cf = 0;
-
- /* CF instr state */
- instr_cf_exec_t exec = {.opc = EXEC};
- instr_cf_alloc_t alloc = {.opc = ALLOC};
-
- int sync_id, sync_id_prev = -1;
- bool is_fetch = false;
- bool need_sync = true;
- bool need_alloc = false;
- unsigned block_idx = 0;
-
- ctx->info->mem_export_ptr = -1;
- ctx->info->num_fetch_instrs = 0;
-
- /* vertex shader always needs to allocate at least one parameter
- * if it will never happen,
- */
- if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
- alloc.buffer_select = SQ_PARAMETER_PIXEL;
- cfs[num_cf++].alloc = alloc;
- }
-
- block_addr[0] = 0;
-
- for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
- struct ir2_instr *instr = ctx->instr_sched[j].instr;
-
- /* catch IR2_CF since it isn't a regular instruction */
- if (instr && instr->type == IR2_CF) {
- assert(!need_alloc); /* XXX */
-
- /* flush any exec cf before inserting jmp */
- if (exec.count)
- num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
-
- cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) {
- .opc = COND_JMP,
- .address = instr->cf.block_idx, /* will be fixed later */
- .force_call = !instr->pred,
- .predicated_jmp = 1,
- .direction = instr->cf.block_idx > instr->block_idx,
- .condition = instr->pred & 1,
- };
- continue;
- }
-
- /* fill the 3 dwords for the instruction */
- fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
-
- /* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
- sync_id = 0;
- if (is_fetch)
- sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
-
- need_sync = sync_id != sync_id_prev;
- sync_id_prev = sync_id;
-
- unsigned block;
- {
-
- if (ctx->instr_sched[j].instr)
- block = ctx->instr_sched[j].instr->block_idx;
- else
- block = ctx->instr_sched[j].instr_s->block_idx;
-
- assert(block_idx <= block);
- }
-
- /* info for patching */
- if (is_fetch) {
- struct ir2_fetch_info *info =
- &ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
- info->offset = i * 3; /* add cf offset later */
-
- if (bc.fetch.opc == VTX_FETCH) {
- info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
- } else if (bc.fetch.opc == TEX_FETCH) {
- info->tex.samp_id = instr->fetch.tex.samp_id;
- info->tex.src_swiz = bc.fetch.tex.src_swiz;
- } else {
- ctx->info->num_fetch_instrs--;
- }
- }
-
- /* exec cf after 6 instr or when switching between fetch / alu */
- if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) {
- num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
- need_alloc = false;
- }
-
- /* update block_addrs for jmp patching */
- while (block_idx < block)
- block_addr[++block_idx] = num_cf;
-
- /* export - fill alloc cf */
- if (!is_fetch && bc.alu.export_data) {
- /* get the export buffer from either vector/scalar dest */
- instr_alloc_type_t buffer =
- export_buf(bc.alu.vector_dest);
- if (bc.alu.scalar_write_mask) {
- if (bc.alu.vector_write_mask)
- assert(buffer == export_buf(bc.alu.scalar_dest));
- buffer = export_buf(bc.alu.scalar_dest);
- }
-
- /* flush previous alloc if the buffer changes */
- bool need_new_alloc = buffer != alloc.buffer_select;
-
- /* memory export always in 32/33 pair, new alloc on 32 */
- if (bc.alu.vector_dest == 32)
- need_new_alloc = true;
-
- if (need_new_alloc && exec.count) {
- num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
- need_alloc = false;
- }
-
- need_alloc |= need_new_alloc;
-
- alloc.size = 0;
- alloc.buffer_select = buffer;
-
- if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX)
- alloc.size = ctx->f->inputs_count - 1;
-
- if (buffer == SQ_POSITION)
- alloc.size = ctx->so->writes_psize;
- }
-
- if (is_fetch)
- exec.serialize |= 0x1 << exec.count * 2;
- if (need_sync)
- exec.serialize |= 0x2 << exec.count * 2;
-
- need_sync = false;
- exec.count += 1;
- bytecode[i++] = bc;
- }
-
- /* final exec cf */
- exec.opc = EXEC_END;
- num_cf =
- write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
-
- /* insert nop to get an even # of CFs */
- if (num_cf % 2)
- cfs[num_cf++] = (instr_cf_t) {
- .opc = NOP};
-
- /* patch cf addrs */
- for (int idx = 0; idx < num_cf; idx++) {
- switch (cfs[idx].opc) {
- case NOP:
- case ALLOC:
- break;
- case EXEC:
- case EXEC_END:
- cfs[idx].exec.address += num_cf / 2;
- break;
- case COND_JMP:
- cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
- break;
- default:
- assert(0);
- }
- }
-
- /* concatenate cfs and alu/fetch */
- uint32_t cfdwords = num_cf / 2 * 3;
- uint32_t alufetchdwords = exec.address * 3;
- uint32_t sizedwords = cfdwords + alufetchdwords;
- uint32_t *dwords = malloc(sizedwords * 4);
- assert(dwords);
- memcpy(dwords, cfs, cfdwords * 4);
- memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
-
- /* finalize ir2_shader_info */
- ctx->info->dwords = dwords;
- ctx->info->sizedwords = sizedwords;
- for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
- ctx->info->fetch_info[i].offset += cfdwords;
-
- if (FD_DBG(DISASM)) {
- DBG("disassemble: type=%d", ctx->so->type);
- disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
- }
+ /* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
+ * address is 9 bits so could it be 512 ?
+ */
+ instr_cf_t cfs[384];
+ instr_t bytecode[384], bc;
+ unsigned block_addr[128];
+ unsigned num_cf = 0;
+
+ /* CF instr state */
+ instr_cf_exec_t exec = {.opc = EXEC};
+ instr_cf_alloc_t alloc = {.opc = ALLOC};
+
+ int sync_id, sync_id_prev = -1;
+ bool is_fetch = false;
+ bool need_sync = true;
+ bool need_alloc = false;
+ unsigned block_idx = 0;
+
+ ctx->info->mem_export_ptr = -1;
+ ctx->info->num_fetch_instrs = 0;
+
+ /* vertex shader always needs to allocate at least one parameter
+ * if it will never happen,
+ */
+ if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
+ alloc.buffer_select = SQ_PARAMETER_PIXEL;
+ cfs[num_cf++].alloc = alloc;
+ }
+
+ block_addr[0] = 0;
+
+ for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
+ struct ir2_instr *instr = ctx->instr_sched[j].instr;
+
+ /* catch IR2_CF since it isn't a regular instruction */
+ if (instr && instr->type == IR2_CF) {
+ assert(!need_alloc); /* XXX */
+
+ /* flush any exec cf before inserting jmp */
+ if (exec.count)
+ num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
+
+ cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t){
+ .opc = COND_JMP,
+ .address = instr->cf.block_idx, /* will be fixed later */
+ .force_call = !instr->pred,
+ .predicated_jmp = 1,
+ .direction = instr->cf.block_idx > instr->block_idx,
+ .condition = instr->pred & 1,
+ };
+ continue;
+ }
+
+ /* fill the 3 dwords for the instruction */
+ fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
+
+ /* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
+ sync_id = 0;
+ if (is_fetch)
+ sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
+
+ need_sync = sync_id != sync_id_prev;
+ sync_id_prev = sync_id;
+
+ unsigned block;
+ {
+
+ if (ctx->instr_sched[j].instr)
+ block = ctx->instr_sched[j].instr->block_idx;
+ else
+ block = ctx->instr_sched[j].instr_s->block_idx;
+
+ assert(block_idx <= block);
+ }
+
+ /* info for patching */
+ if (is_fetch) {
+ struct ir2_fetch_info *info =
+ &ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
+ info->offset = i * 3; /* add cf offset later */
+
+ if (bc.fetch.opc == VTX_FETCH) {
+ info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
+ } else if (bc.fetch.opc == TEX_FETCH) {
+ info->tex.samp_id = instr->fetch.tex.samp_id;
+ info->tex.src_swiz = bc.fetch.tex.src_swiz;
+ } else {
+ ctx->info->num_fetch_instrs--;
+ }
+ }
+
+ /* exec cf after 6 instr or when switching between fetch / alu */
+ if (exec.count == 6 ||
+ (exec.count && (need_sync || block != block_idx))) {
+ num_cf =
+ write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+ need_alloc = false;
+ }
+
+ /* update block_addrs for jmp patching */
+ while (block_idx < block)
+ block_addr[++block_idx] = num_cf;
+
+ /* export - fill alloc cf */
+ if (!is_fetch && bc.alu.export_data) {
+ /* get the export buffer from either vector/scalar dest */
+ instr_alloc_type_t buffer = export_buf(bc.alu.vector_dest);
+ if (bc.alu.scalar_write_mask) {
+ if (bc.alu.vector_write_mask)
+ assert(buffer == export_buf(bc.alu.scalar_dest));
+ buffer = export_buf(bc.alu.scalar_dest);
+ }
+
+ /* flush previous alloc if the buffer changes */
+ bool need_new_alloc = buffer != alloc.buffer_select;
+
+ /* memory export always in 32/33 pair, new alloc on 32 */
+ if (bc.alu.vector_dest == 32)
+ need_new_alloc = true;
+
+ if (need_new_alloc && exec.count) {
+ num_cf =
+ write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+ need_alloc = false;
+ }
+
+ need_alloc |= need_new_alloc;
+
+ alloc.size = 0;
+ alloc.buffer_select = buffer;
+
+ if (buffer == SQ_PARAMETER_PIXEL &&
+ ctx->so->type == MESA_SHADER_VERTEX)
+ alloc.size = ctx->f->inputs_count - 1;
+
+ if (buffer == SQ_POSITION)
+ alloc.size = ctx->so->writes_psize;
+ }
+
+ if (is_fetch)
+ exec.serialize |= 0x1 << exec.count * 2;
+ if (need_sync)
+ exec.serialize |= 0x2 << exec.count * 2;
+
+ need_sync = false;
+ exec.count += 1;
+ bytecode[i++] = bc;
+ }
+
+ /* final exec cf */
+ exec.opc = EXEC_END;
+ num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+
+ /* insert nop to get an even # of CFs */
+ if (num_cf % 2)
+ cfs[num_cf++] = (instr_cf_t){.opc = NOP};
+
+ /* patch cf addrs */
+ for (int idx = 0; idx < num_cf; idx++) {
+ switch (cfs[idx].opc) {
+ case NOP:
+ case ALLOC:
+ break;
+ case EXEC:
+ case EXEC_END:
+ cfs[idx].exec.address += num_cf / 2;
+ break;
+ case COND_JMP:
+ cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ /* concatenate cfs and alu/fetch */
+ uint32_t cfdwords = num_cf / 2 * 3;
+ uint32_t alufetchdwords = exec.address * 3;
+ uint32_t sizedwords = cfdwords + alufetchdwords;
+ uint32_t *dwords = malloc(sizedwords * 4);
+ assert(dwords);
+ memcpy(dwords, cfs, cfdwords * 4);
+ memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
+
+ /* finalize ir2_shader_info */
+ ctx->info->dwords = dwords;
+ ctx->info->sizedwords = sizedwords;
+ for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
+ ctx->info->fetch_info[i].offset += cfdwords;
+
+ if (FD_DBG(DISASM)) {
+ DBG("disassemble: type=%d", ctx->so->type);
+ disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
+ }
}
#include "ir2_private.h"
-static bool is_mov(struct ir2_instr *instr)
+static bool
+is_mov(struct ir2_instr *instr)
{
- return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
- instr->src_count == 1;
+ return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
+ instr->src_count == 1;
}
-static void src_combine(struct ir2_src *src, struct ir2_src b)
+static void
+src_combine(struct ir2_src *src, struct ir2_src b)
{
- src->num = b.num;
- src->type = b.type;
- src->swizzle = swiz_merge(b.swizzle, src->swizzle);
- if (!src->abs) /* if we have abs we don't care about previous negate */
- src->negate ^= b.negate;
- src->abs |= b.abs;
+ src->num = b.num;
+ src->type = b.type;
+ src->swizzle = swiz_merge(b.swizzle, src->swizzle);
+ if (!src->abs) /* if we have abs we don't care about previous negate */
+ src->negate ^= b.negate;
+ src->abs |= b.abs;
}
/* cp_src: replace src regs when they refer to a mov instruction
* becomes:
* ALU: MULADDv R7 = C7, R10, R0.xxxx
*/
-void cp_src(struct ir2_context *ctx)
+void
+cp_src(struct ir2_context *ctx)
{
- struct ir2_instr *p;
-
- ir2_foreach_instr(instr, ctx) {
- ir2_foreach_src(src, instr) {
- /* loop to replace recursively */
- do {
- if (src->type != IR2_SRC_SSA)
- break;
-
- p = &ctx->instr[src->num];
- /* don't work across blocks to avoid possible issues */
- if (p->block_idx != instr->block_idx)
- break;
-
- if (!is_mov(p))
- break;
-
- if (p->alu.saturate)
- break;
-
- /* cant apply abs to const src, const src only for alu */
- if (p->src[0].type == IR2_SRC_CONST &&
- (src->abs || instr->type != IR2_ALU))
- break;
-
- src_combine(src, p->src[0]);
- } while (1);
- }
- }
+ struct ir2_instr *p;
+
+ ir2_foreach_instr(instr, ctx)
+ {
+ ir2_foreach_src(src, instr)
+ {
+ /* loop to replace recursively */
+ do {
+ if (src->type != IR2_SRC_SSA)
+ break;
+
+ p = &ctx->instr[src->num];
+ /* don't work across blocks to avoid possible issues */
+ if (p->block_idx != instr->block_idx)
+ break;
+
+ if (!is_mov(p))
+ break;
+
+ if (p->alu.saturate)
+ break;
+
+ /* cant apply abs to const src, const src only for alu */
+ if (p->src[0].type == IR2_SRC_CONST &&
+ (src->abs || instr->type != IR2_ALU))
+ break;
+
+ src_combine(src, p->src[0]);
+ } while (1);
+ }
+ }
}
/* cp_export: replace mov to export when possible
* ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
*
*/
-void cp_export(struct ir2_context *ctx)
+void
+cp_export(struct ir2_context *ctx)
{
- struct ir2_instr *c[4], *ins[4];
- struct ir2_src *src;
- struct ir2_reg *reg;
- unsigned ncomp;
-
- ir2_foreach_instr(instr, ctx) {
- if (!is_export(instr)) /* TODO */
- continue;
-
- if (!is_mov(instr))
- continue;
-
- src = &instr->src[0];
-
- if (src->negate || src->abs) /* TODO handle these cases */
- continue;
-
- if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
- continue;
-
- reg = get_reg_src(ctx, src);
- ncomp = dst_ncomp(instr);
-
- unsigned reswiz[4] = {};
- unsigned num_instr = 0;
-
- /* fill array c with pointers to instrs that write each component */
- if (src->type == IR2_SRC_SSA) {
- struct ir2_instr *instr = &ctx->instr[src->num];
-
- if (instr->type != IR2_ALU)
- continue;
-
- for (int i = 0; i < ncomp; i++)
- c[i] = instr;
-
- ins[num_instr++] = instr;
- reswiz[0] = src->swizzle;
- } else {
- bool ok = true;
- unsigned write_mask = 0;
-
- ir2_foreach_instr(instr, ctx) {
- if (instr->is_ssa || instr->reg != reg)
- continue;
-
- /* set by non-ALU */
- if (instr->type != IR2_ALU) {
- ok = false;
- break;
- }
-
- /* component written more than once */
- if (write_mask & instr->alu.write_mask) {
- ok = false;
- break;
- }
-
- write_mask |= instr->alu.write_mask;
-
- /* src pointers for components */
- for (int i = 0, j = 0; i < 4; i++) {
- unsigned k = swiz_get(src->swizzle, i);
- if (instr->alu.write_mask & 1 << k) {
- c[i] = instr;
-
- /* reswiz = compressed src->swizzle */
- unsigned x = 0;
- for (int i = 0; i < k; i++)
- x += !!(instr->alu.write_mask & 1 << i);
-
- assert(src->swizzle || x == j);
- reswiz[num_instr] |= swiz_set(x, j++);
- }
- }
- ins[num_instr++] = instr;
- }
- if (!ok)
- continue;
- }
-
- bool redirect = true;
-
- /* must all be in same block */
- for (int i = 0; i < ncomp; i++)
- redirect &= (c[i]->block_idx == instr->block_idx);
-
- /* no other instr using the value */
- ir2_foreach_instr(p, ctx) {
- if (p == instr)
- continue;
- ir2_foreach_src(src, p)
- redirect &= reg != get_reg_src(ctx, src);
- }
-
- if (!redirect)
- continue;
-
- /* redirect the instructions writing to the register */
- for (int i = 0; i < num_instr; i++) {
- struct ir2_instr *p = ins[i];
-
- p->alu.export = instr->alu.export;
- p->alu.write_mask = 0;
- p->is_ssa = true;
- p->ssa.ncomp = 0;
- memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
- p->alu.saturate |= instr->alu.saturate;
-
- switch (p->alu.vector_opc) {
- case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
- case DOT2ADDv:
- case DOT3v:
- case DOT4v:
- case CUBEv:
- continue;
- default:
- break;
- }
- ir2_foreach_src(s, p)
- swiz_merge_p(&s->swizzle, reswiz[i]);
- }
-
- for (int i = 0; i < ncomp; i++) {
- c[i]->alu.write_mask |= (1 << i);
- c[i]->ssa.ncomp++;
- }
- instr->type = IR2_NONE;
- instr->need_emit = false;
- }
+ struct ir2_instr *c[4], *ins[4];
+ struct ir2_src *src;
+ struct ir2_reg *reg;
+ unsigned ncomp;
+
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (!is_export(instr)) /* TODO */
+ continue;
+
+ if (!is_mov(instr))
+ continue;
+
+ src = &instr->src[0];
+
+ if (src->negate || src->abs) /* TODO handle these cases */
+ continue;
+
+ if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
+ continue;
+
+ reg = get_reg_src(ctx, src);
+ ncomp = dst_ncomp(instr);
+
+ unsigned reswiz[4] = {};
+ unsigned num_instr = 0;
+
+ /* fill array c with pointers to instrs that write each component */
+ if (src->type == IR2_SRC_SSA) {
+ struct ir2_instr *instr = &ctx->instr[src->num];
+
+ if (instr->type != IR2_ALU)
+ continue;
+
+ for (int i = 0; i < ncomp; i++)
+ c[i] = instr;
+
+ ins[num_instr++] = instr;
+ reswiz[0] = src->swizzle;
+ } else {
+ bool ok = true;
+ unsigned write_mask = 0;
+
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (instr->is_ssa || instr->reg != reg)
+ continue;
+
+ /* set by non-ALU */
+ if (instr->type != IR2_ALU) {
+ ok = false;
+ break;
+ }
+
+ /* component written more than once */
+ if (write_mask & instr->alu.write_mask) {
+ ok = false;
+ break;
+ }
+
+ write_mask |= instr->alu.write_mask;
+
+ /* src pointers for components */
+ for (int i = 0, j = 0; i < 4; i++) {
+ unsigned k = swiz_get(src->swizzle, i);
+ if (instr->alu.write_mask & 1 << k) {
+ c[i] = instr;
+
+ /* reswiz = compressed src->swizzle */
+ unsigned x = 0;
+ for (int i = 0; i < k; i++)
+ x += !!(instr->alu.write_mask & 1 << i);
+
+ assert(src->swizzle || x == j);
+ reswiz[num_instr] |= swiz_set(x, j++);
+ }
+ }
+ ins[num_instr++] = instr;
+ }
+ if (!ok)
+ continue;
+ }
+
+ bool redirect = true;
+
+ /* must all be in same block */
+ for (int i = 0; i < ncomp; i++)
+ redirect &= (c[i]->block_idx == instr->block_idx);
+
+ /* no other instr using the value */
+ ir2_foreach_instr(p, ctx)
+ {
+ if (p == instr)
+ continue;
+ ir2_foreach_src(src, p) redirect &= reg != get_reg_src(ctx, src);
+ }
+
+ if (!redirect)
+ continue;
+
+ /* redirect the instructions writing to the register */
+ for (int i = 0; i < num_instr; i++) {
+ struct ir2_instr *p = ins[i];
+
+ p->alu.export = instr->alu.export;
+ p->alu.write_mask = 0;
+ p->is_ssa = true;
+ p->ssa.ncomp = 0;
+ memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
+ p->alu.saturate |= instr->alu.saturate;
+
+ switch (p->alu.vector_opc) {
+ case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
+ case DOT2ADDv:
+ case DOT3v:
+ case DOT4v:
+ case CUBEv:
+ continue;
+ default:
+ break;
+ }
+ ir2_foreach_src(s, p) swiz_merge_p(&s->swizzle, reswiz[i]);
+ }
+
+ for (int i = 0; i < ncomp; i++) {
+ c[i]->alu.write_mask |= (1 << i);
+ c[i]->ssa.ncomp++;
+ }
+ instr->type = IR2_NONE;
+ instr->need_emit = false;
+ }
}
#include "ir2_private.h"
-#include "freedreno_util.h"
#include "fd2_program.h"
+#include "freedreno_util.h"
static const nir_shader_compiler_options options = {
- .lower_fpow = true,
- .lower_flrp32 = true,
- .lower_fmod = true,
- .lower_fdiv = true,
- .lower_fceil = true,
- .fuse_ffma16 = true,
- .fuse_ffma32 = true,
- .fuse_ffma64 = true,
- /* .fdot_replicates = true, it is replicated, but it makes things worse */
- .lower_all_io_to_temps = true,
- .vertex_id_zero_based = true, /* its not implemented anyway */
- .lower_bitops = true,
- .lower_rotate = true,
- .lower_vector_cmp = true,
- .lower_fdph = true,
- .has_fsub = true,
- .has_isub = true,
+ .lower_fpow = true,
+ .lower_flrp32 = true,
+ .lower_fmod = true,
+ .lower_fdiv = true,
+ .lower_fceil = true,
+ .fuse_ffma16 = true,
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ /* .fdot_replicates = true, it is replicated, but it makes things worse */
+ .lower_all_io_to_temps = true,
+ .vertex_id_zero_based = true, /* its not implemented anyway */
+ .lower_bitops = true,
+ .lower_rotate = true,
+ .lower_vector_cmp = true,
+ .lower_fdph = true,
+ .has_fsub = true,
+ .has_isub = true,
};
const nir_shader_compiler_options *
ir2_get_compiler_options(void)
{
- return &options;
+ return &options;
}
-#define OPT(nir, pass, ...) ({ \
- bool this_progress = false; \
- NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
- this_progress; \
-})
+#define OPT(nir, pass, ...) \
+ ({ \
+ bool this_progress = false; \
+ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
+ this_progress; \
+ })
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
static void
ir2_optimize_loop(nir_shader *s)
{
- bool progress;
- do {
- progress = false;
-
- OPT_V(s, nir_lower_vars_to_ssa);
- progress |= OPT(s, nir_opt_copy_prop_vars);
- progress |= OPT(s, nir_copy_prop);
- progress |= OPT(s, nir_opt_dce);
- progress |= OPT(s, nir_opt_cse);
- /* progress |= OPT(s, nir_opt_gcm, true); */
- progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
- progress |= OPT(s, nir_opt_intrinsics);
- progress |= OPT(s, nir_opt_algebraic);
- progress |= OPT(s, nir_opt_constant_folding);
- progress |= OPT(s, nir_opt_dead_cf);
- if (OPT(s, nir_opt_trivial_continues)) {
- progress |= true;
- /* If nir_opt_trivial_continues makes progress, then we need to clean
- * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
- * to make progress.
- */
- OPT(s, nir_copy_prop);
- OPT(s, nir_opt_dce);
- }
- progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
- progress |= OPT(s, nir_opt_if, false);
- progress |= OPT(s, nir_opt_remove_phis);
- progress |= OPT(s, nir_opt_undef);
-
- }
- while (progress);
+ bool progress;
+ do {
+ progress = false;
+
+ OPT_V(s, nir_lower_vars_to_ssa);
+ progress |= OPT(s, nir_opt_copy_prop_vars);
+ progress |= OPT(s, nir_copy_prop);
+ progress |= OPT(s, nir_opt_dce);
+ progress |= OPT(s, nir_opt_cse);
+ /* progress |= OPT(s, nir_opt_gcm, true); */
+ progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
+ progress |= OPT(s, nir_opt_intrinsics);
+ progress |= OPT(s, nir_opt_algebraic);
+ progress |= OPT(s, nir_opt_constant_folding);
+ progress |= OPT(s, nir_opt_dead_cf);
+ if (OPT(s, nir_opt_trivial_continues)) {
+ progress |= true;
+ /* If nir_opt_trivial_continues makes progress, then we need to clean
+ * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
+ * to make progress.
+ */
+ OPT(s, nir_copy_prop);
+ OPT(s, nir_opt_dce);
+ }
+ progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+ progress |= OPT(s, nir_opt_if, false);
+ progress |= OPT(s, nir_opt_remove_phis);
+ progress |= OPT(s, nir_opt_undef);
+
+ } while (progress);
}
/* trig workarounds is the same as ir3.. but we don't want to include ir3 */
-bool ir3_nir_apply_trig_workarounds(nir_shader * shader);
+bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
int
ir2_optimize_nir(nir_shader *s, bool lower)
{
- struct nir_lower_tex_options tex_options = {
- .lower_txp = ~0u,
- .lower_rect = 0,
- };
-
- if (FD_DBG(DISASM)) {
- debug_printf("----------------------\n");
- nir_print_shader(s, stdout);
- debug_printf("----------------------\n");
- }
-
- OPT_V(s, nir_lower_regs_to_ssa);
- OPT_V(s, nir_lower_vars_to_ssa);
- OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
-
- if (lower) {
- OPT_V(s, ir3_nir_apply_trig_workarounds);
- OPT_V(s, nir_lower_tex, &tex_options);
- }
-
- ir2_optimize_loop(s);
-
- OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
- OPT_V(s, nir_opt_sink, nir_move_const_undef);
-
- /* TODO we dont want to get shaders writing to depth for depth textures */
- if (s->info.stage == MESA_SHADER_FRAGMENT) {
- nir_foreach_shader_out_variable(var, s) {
- if (var->data.location == FRAG_RESULT_DEPTH)
- return -1;
- }
- }
-
- return 0;
+ struct nir_lower_tex_options tex_options = {
+ .lower_txp = ~0u,
+ .lower_rect = 0,
+ };
+
+ if (FD_DBG(DISASM)) {
+ debug_printf("----------------------\n");
+ nir_print_shader(s, stdout);
+ debug_printf("----------------------\n");
+ }
+
+ OPT_V(s, nir_lower_regs_to_ssa);
+ OPT_V(s, nir_lower_vars_to_ssa);
+ OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out,
+ UINT32_MAX);
+
+ if (lower) {
+ OPT_V(s, ir3_nir_apply_trig_workarounds);
+ OPT_V(s, nir_lower_tex, &tex_options);
+ }
+
+ ir2_optimize_loop(s);
+
+ OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
+ OPT_V(s, nir_opt_sink, nir_move_const_undef);
+
+ /* TODO we dont want to get shaders writing to depth for depth textures */
+ if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ nir_foreach_shader_out_variable(var, s)
+ {
+ if (var->data.location == FRAG_RESULT_DEPTH)
+ return -1;
+ }
+ }
+
+ return 0;
}
static struct ir2_src
load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
{
- struct fd2_shader_stateobj *so = ctx->so;
- unsigned imm_ncomp, swiz, idx, i, j;
- uint32_t *value = (uint32_t*) value_f;
-
- /* try to merge with existing immediate (TODO: try with neg) */
- for (idx = 0; idx < so->num_immediates; idx++) {
- swiz = 0;
- imm_ncomp = so->immediates[idx].ncomp;
- for (i = 0; i < ncomp; i++) {
- for (j = 0; j < imm_ncomp; j++) {
- if (value[i] == so->immediates[idx].val[j])
- break;
- }
- if (j == imm_ncomp) {
- if (j == 4)
- break;
- so->immediates[idx].val[imm_ncomp++] = value[i];
- }
- swiz |= swiz_set(j, i);
- }
- /* matched all components */
- if (i == ncomp)
- break;
- }
-
- /* need to allocate new immediate */
- if (idx == so->num_immediates) {
- swiz = 0;
- imm_ncomp = 0;
- for (i = 0; i < ncomp; i++) {
- for (j = 0; j < imm_ncomp; j++) {
- if (value[i] == ctx->so->immediates[idx].val[j])
- break;
- }
- if (j == imm_ncomp) {
- so->immediates[idx].val[imm_ncomp++] = value[i];
- }
- swiz |= swiz_set(j, i);
- }
- so->num_immediates++;
- }
- so->immediates[idx].ncomp = imm_ncomp;
-
- if (ncomp == 1)
- swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
-
- return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
+ struct fd2_shader_stateobj *so = ctx->so;
+ unsigned imm_ncomp, swiz, idx, i, j;
+ uint32_t *value = (uint32_t *)value_f;
+
+ /* try to merge with existing immediate (TODO: try with neg) */
+ for (idx = 0; idx < so->num_immediates; idx++) {
+ swiz = 0;
+ imm_ncomp = so->immediates[idx].ncomp;
+ for (i = 0; i < ncomp; i++) {
+ for (j = 0; j < imm_ncomp; j++) {
+ if (value[i] == so->immediates[idx].val[j])
+ break;
+ }
+ if (j == imm_ncomp) {
+ if (j == 4)
+ break;
+ so->immediates[idx].val[imm_ncomp++] = value[i];
+ }
+ swiz |= swiz_set(j, i);
+ }
+ /* matched all components */
+ if (i == ncomp)
+ break;
+ }
+
+ /* need to allocate new immediate */
+ if (idx == so->num_immediates) {
+ swiz = 0;
+ imm_ncomp = 0;
+ for (i = 0; i < ncomp; i++) {
+ for (j = 0; j < imm_ncomp; j++) {
+ if (value[i] == ctx->so->immediates[idx].val[j])
+ break;
+ }
+ if (j == imm_ncomp) {
+ so->immediates[idx].val[imm_ncomp++] = value[i];
+ }
+ swiz |= swiz_set(j, i);
+ }
+ so->num_immediates++;
+ }
+ so->immediates[idx].ncomp = imm_ncomp;
+
+ if (ncomp == 1)
+ swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
+
+ return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
}
struct ir2_src
ir2_zero(struct ir2_context *ctx)
{
- return load_const(ctx, (float[]) {0.0f}, 1);
+ return load_const(ctx, (float[]){0.0f}, 1);
}
static void
update_range(struct ir2_context *ctx, struct ir2_reg *reg)
{
- if (!reg->initialized) {
- reg->initialized = true;
- reg->loop_depth = ctx->loop_depth;
- }
-
- if (ctx->loop_depth > reg->loop_depth) {
- reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
- } else {
- reg->loop_depth = ctx->loop_depth;
- reg->block_idx_free = -1;
- }
-
- /* for regs we want to free at the end of the loop in any case
- * XXX dont do this for ssa
- */
- if (reg->loop_depth)
- reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
+ if (!reg->initialized) {
+ reg->initialized = true;
+ reg->loop_depth = ctx->loop_depth;
+ }
+
+ if (ctx->loop_depth > reg->loop_depth) {
+ reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
+ } else {
+ reg->loop_depth = ctx->loop_depth;
+ reg->block_idx_free = -1;
+ }
+
+ /* for regs we want to free at the end of the loop in any case
+ * XXX dont do this for ssa
+ */
+ if (reg->loop_depth)
+ reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
}
static struct ir2_src
make_src(struct ir2_context *ctx, nir_src src)
{
- struct ir2_src res = {};
- struct ir2_reg *reg;
-
- nir_const_value *const_value = nir_src_as_const_value(src);
-
- if (const_value) {
- assert(src.is_ssa);
- float c[src.ssa->num_components];
- nir_const_value_to_array(c, const_value, src.ssa->num_components, f32);
- return load_const(ctx, c, src.ssa->num_components);
- }
-
- if (!src.is_ssa) {
- res.num = src.reg.reg->index;
- res.type = IR2_SRC_REG;
- reg = &ctx->reg[res.num];
- } else {
- assert(ctx->ssa_map[src.ssa->index] >= 0);
- res.num = ctx->ssa_map[src.ssa->index];
- res.type = IR2_SRC_SSA;
- reg = &ctx->instr[res.num].ssa;
- }
-
- update_range(ctx, reg);
- return res;
+ struct ir2_src res = {};
+ struct ir2_reg *reg;
+
+ nir_const_value *const_value = nir_src_as_const_value(src);
+
+ if (const_value) {
+ assert(src.is_ssa);
+ float c[src.ssa->num_components];
+ nir_const_value_to_array(c, const_value, src.ssa->num_components, f32);
+ return load_const(ctx, c, src.ssa->num_components);
+ }
+
+ if (!src.is_ssa) {
+ res.num = src.reg.reg->index;
+ res.type = IR2_SRC_REG;
+ reg = &ctx->reg[res.num];
+ } else {
+ assert(ctx->ssa_map[src.ssa->index] >= 0);
+ res.num = ctx->ssa_map[src.ssa->index];
+ res.type = IR2_SRC_SSA;
+ reg = &ctx->instr[res.num].ssa;
+ }
+
+ update_range(ctx, reg);
+ return res;
}
static void
-set_index(struct ir2_context *ctx, nir_dest * dst,
- struct ir2_instr *instr)
+set_index(struct ir2_context *ctx, nir_dest *dst, struct ir2_instr *instr)
{
- struct ir2_reg *reg = &instr->ssa;
-
- if (dst->is_ssa) {
- ctx->ssa_map[dst->ssa.index] = instr->idx;
- } else {
- assert(instr->is_ssa);
- reg = &ctx->reg[dst->reg.reg->index];
-
- instr->is_ssa = false;
- instr->reg = reg;
- }
- update_range(ctx, reg);
+ struct ir2_reg *reg = &instr->ssa;
+
+ if (dst->is_ssa) {
+ ctx->ssa_map[dst->ssa.index] = instr->idx;
+ } else {
+ assert(instr->is_ssa);
+ reg = &ctx->reg[dst->reg.reg->index];
+
+ instr->is_ssa = false;
+ instr->reg = reg;
+ }
+ update_range(ctx, reg);
}
static struct ir2_instr *
ir2_instr_create(struct ir2_context *ctx, int type)
{
- struct ir2_instr *instr;
-
- instr = &ctx->instr[ctx->instr_count++];
- instr->idx = ctx->instr_count - 1;
- instr->type = type;
- instr->block_idx = ctx->block_idx;
- instr->pred = ctx->pred;
- instr->is_ssa = true;
- return instr;
+ struct ir2_instr *instr;
+
+ instr = &ctx->instr[ctx->instr_count++];
+ instr->idx = ctx->instr_count - 1;
+ instr->type = type;
+ instr->block_idx = ctx->block_idx;
+ instr->pred = ctx->pred;
+ instr->is_ssa = true;
+ return instr;
}
static struct ir2_instr *
instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
{
- /* emit_alu will fixup instrs that don't map directly */
- static const struct ir2_opc {
- int8_t scalar, vector;
- } nir_ir2_opc[nir_num_opcodes+1] = {
- [0 ... nir_num_opcodes - 1] = {-1, -1},
-
- [nir_op_mov] = {MAXs, MAXv},
- [nir_op_fneg] = {MAXs, MAXv},
- [nir_op_fabs] = {MAXs, MAXv},
- [nir_op_fsat] = {MAXs, MAXv},
- [nir_op_fsign] = {-1, CNDGTEv},
- [nir_op_fadd] = {ADDs, ADDv},
- [nir_op_fsub] = {ADDs, ADDv},
- [nir_op_fmul] = {MULs, MULv},
- [nir_op_ffma] = {-1, MULADDv},
- [nir_op_fmax] = {MAXs, MAXv},
- [nir_op_fmin] = {MINs, MINv},
- [nir_op_ffloor] = {FLOORs, FLOORv},
- [nir_op_ffract] = {FRACs, FRACv},
- [nir_op_ftrunc] = {TRUNCs, TRUNCv},
- [nir_op_fdot2] = {-1, DOT2ADDv},
- [nir_op_fdot3] = {-1, DOT3v},
- [nir_op_fdot4] = {-1, DOT4v},
- [nir_op_sge] = {-1, SETGTEv},
- [nir_op_slt] = {-1, SETGTv},
- [nir_op_sne] = {-1, SETNEv},
- [nir_op_seq] = {-1, SETEv},
- [nir_op_fcsel] = {-1, CNDEv},
- [nir_op_frsq] = {RECIPSQ_IEEE, -1},
- [nir_op_frcp] = {RECIP_IEEE, -1},
- [nir_op_flog2] = {LOG_IEEE, -1},
- [nir_op_fexp2] = {EXP_IEEE, -1},
- [nir_op_fsqrt] = {SQRT_IEEE, -1},
- [nir_op_fcos] = {COS, -1},
- [nir_op_fsin] = {SIN, -1},
- /* no fsat, fneg, fabs since source mods deal with those */
-
- /* so we can use this function with non-nir op */
+ /* emit_alu will fixup instrs that don't map directly */
+ static const struct ir2_opc {
+ int8_t scalar, vector;
+ } nir_ir2_opc[nir_num_opcodes + 1] = {
+ [0 ... nir_num_opcodes - 1] = {-1, -1},
+
+ [nir_op_mov] = {MAXs, MAXv},
+ [nir_op_fneg] = {MAXs, MAXv},
+ [nir_op_fabs] = {MAXs, MAXv},
+ [nir_op_fsat] = {MAXs, MAXv},
+ [nir_op_fsign] = {-1, CNDGTEv},
+ [nir_op_fadd] = {ADDs, ADDv},
+ [nir_op_fsub] = {ADDs, ADDv},
+ [nir_op_fmul] = {MULs, MULv},
+ [nir_op_ffma] = {-1, MULADDv},
+ [nir_op_fmax] = {MAXs, MAXv},
+ [nir_op_fmin] = {MINs, MINv},
+ [nir_op_ffloor] = {FLOORs, FLOORv},
+ [nir_op_ffract] = {FRACs, FRACv},
+ [nir_op_ftrunc] = {TRUNCs, TRUNCv},
+ [nir_op_fdot2] = {-1, DOT2ADDv},
+ [nir_op_fdot3] = {-1, DOT3v},
+ [nir_op_fdot4] = {-1, DOT4v},
+ [nir_op_sge] = {-1, SETGTEv},
+ [nir_op_slt] = {-1, SETGTv},
+ [nir_op_sne] = {-1, SETNEv},
+ [nir_op_seq] = {-1, SETEv},
+ [nir_op_fcsel] = {-1, CNDEv},
+ [nir_op_frsq] = {RECIPSQ_IEEE, -1},
+ [nir_op_frcp] = {RECIP_IEEE, -1},
+ [nir_op_flog2] = {LOG_IEEE, -1},
+ [nir_op_fexp2] = {EXP_IEEE, -1},
+ [nir_op_fsqrt] = {SQRT_IEEE, -1},
+ [nir_op_fcos] = {COS, -1},
+ [nir_op_fsin] = {SIN, -1},
+ /* no fsat, fneg, fabs since source mods deal with those */
+
+ /* so we can use this function with non-nir op */
#define ir2_op_cube nir_num_opcodes
- [ir2_op_cube] = {-1, CUBEv},
- };
-
- struct ir2_opc op = nir_ir2_opc[opcode];
- assert(op.vector >= 0 || op.scalar >= 0);
-
- struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
- instr->alu.vector_opc = op.vector;
- instr->alu.scalar_opc = op.scalar;
- instr->alu.export = -1;
- instr->alu.write_mask = (1 << ncomp) - 1;
- instr->src_count = opcode == ir2_op_cube ? 2 :
- nir_op_infos[opcode].num_inputs;
- instr->ssa.ncomp = ncomp;
- return instr;
+ [ir2_op_cube] = {-1, CUBEv},
+ };
+
+ struct ir2_opc op = nir_ir2_opc[opcode];
+ assert(op.vector >= 0 || op.scalar >= 0);
+
+ struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
+ instr->alu.vector_opc = op.vector;
+ instr->alu.scalar_opc = op.scalar;
+ instr->alu.export = -1;
+ instr->alu.write_mask = (1 << ncomp) - 1;
+ instr->src_count =
+ opcode == ir2_op_cube ? 2 : nir_op_infos[opcode].num_inputs;
+ instr->ssa.ncomp = ncomp;
+ return instr;
}
static struct ir2_instr *
-instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode,
- uint8_t write_mask, struct ir2_instr *share_reg)
+instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode, uint8_t write_mask,
+ struct ir2_instr *share_reg)
{
- struct ir2_instr *instr;
- struct ir2_reg *reg;
+ struct ir2_instr *instr;
+ struct ir2_reg *reg;
- reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
- reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
+ reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
+ reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
- instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
- instr->alu.write_mask = write_mask;
- instr->reg = reg;
- instr->is_ssa = false;
- return instr;
+ instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
+ instr->alu.write_mask = write_mask;
+ instr->reg = reg;
+ instr->is_ssa = false;
+ return instr;
}
-
static struct ir2_instr *
instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_dest *dst)
{
- struct ir2_instr *instr;
- instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst));
- set_index(ctx, dst, instr);
- return instr;
+ struct ir2_instr *instr;
+ instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst));
+ set_index(ctx, dst, instr);
+ return instr;
}
static struct ir2_instr *
ir2_instr_create_fetch(struct ir2_context *ctx, nir_dest *dst,
- instr_fetch_opc_t opc)
+ instr_fetch_opc_t opc)
{
- struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
- instr->fetch.opc = opc;
- instr->src_count = 1;
- instr->ssa.ncomp = nir_dest_num_components(*dst);
- set_index(ctx, dst, instr);
- return instr;
+ struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
+ instr->fetch.opc = opc;
+ instr->src_count = 1;
+ instr->ssa.ncomp = nir_dest_num_components(*dst);
+ set_index(ctx, dst, instr);
+ return instr;
}
static struct ir2_src
make_src_noconst(struct ir2_context *ctx, nir_src src)
{
- struct ir2_instr *instr;
+ struct ir2_instr *instr;
- if (nir_src_as_const_value(src)) {
- assert(src.is_ssa);
- instr = instr_create_alu(ctx, nir_op_mov, src.ssa->num_components);
- instr->src[0] = make_src(ctx, src);
- return ir2_src(instr->idx, 0, IR2_SRC_SSA);
- }
+ if (nir_src_as_const_value(src)) {
+ assert(src.is_ssa);
+ instr = instr_create_alu(ctx, nir_op_mov, src.ssa->num_components);
+ instr->src[0] = make_src(ctx, src);
+ return ir2_src(instr->idx, 0, IR2_SRC_SSA);
+ }
- return make_src(ctx, src);
+ return make_src(ctx, src);
}
static void
-emit_alu(struct ir2_context *ctx, nir_alu_instr * alu)
+emit_alu(struct ir2_context *ctx, nir_alu_instr *alu)
{
- const nir_op_info *info = &nir_op_infos[alu->op];
- nir_dest *dst = &alu->dest.dest;
- struct ir2_instr *instr;
- struct ir2_src tmp;
- unsigned ncomp;
-
- /* get the number of dst components */
- if (dst->is_ssa) {
- ncomp = dst->ssa.num_components;
- } else {
- ncomp = 0;
- for (int i = 0; i < 4; i++)
- ncomp += !!(alu->dest.write_mask & 1 << i);
- }
-
- instr = instr_create_alu(ctx, alu->op, ncomp);
- set_index(ctx, dst, instr);
- instr->alu.saturate = alu->dest.saturate;
- instr->alu.write_mask = alu->dest.write_mask;
-
- for (int i = 0; i < info->num_inputs; i++) {
- nir_alu_src *src = &alu->src[i];
-
- /* compress swizzle with writemask when applicable */
- unsigned swiz = 0, j = 0;
- for (int i = 0; i < 4; i++) {
- if (!(alu->dest.write_mask & 1 << i) && !info->output_size)
- continue;
- swiz |= swiz_set(src->swizzle[i], j++);
- }
-
- instr->src[i] = make_src(ctx, src->src);
- instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
- instr->src[i].negate = src->negate;
- instr->src[i].abs = src->abs;
- }
-
- /* workarounds for NIR ops that don't map directly to a2xx ops */
- switch (alu->op) {
- case nir_op_fneg:
- instr->src[0].negate = 1;
- break;
- case nir_op_fabs:
- instr->src[0].abs = 1;
- break;
- case nir_op_fsat:
- instr->alu.saturate = 1;
- break;
- case nir_op_slt:
- tmp = instr->src[0];
- instr->src[0] = instr->src[1];
- instr->src[1] = tmp;
- break;
- case nir_op_fcsel:
- tmp = instr->src[1];
- instr->src[1] = instr->src[2];
- instr->src[2] = tmp;
- break;
- case nir_op_fsub:
- instr->src[1].negate = !instr->src[1].negate;
- break;
- case nir_op_fdot2:
- instr->src_count = 3;
- instr->src[2] = ir2_zero(ctx);
- break;
- case nir_op_fsign: {
- /* we need an extra instruction to deal with the zero case */
- struct ir2_instr *tmp;
-
- /* tmp = x == 0 ? 0 : 1 */
- tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
- tmp->src[0] = instr->src[0];
- tmp->src[1] = ir2_zero(ctx);
- tmp->src[2] = load_const(ctx, (float[]) {1.0f}, 1);
-
- /* result = x >= 0 ? tmp : -tmp */
- instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
- instr->src[2] = instr->src[1];
- instr->src[2].negate = true;
- instr->src_count = 3;
- } break;
- default:
- break;
- }
+ const nir_op_info *info = &nir_op_infos[alu->op];
+ nir_dest *dst = &alu->dest.dest;
+ struct ir2_instr *instr;
+ struct ir2_src tmp;
+ unsigned ncomp;
+
+ /* get the number of dst components */
+ if (dst->is_ssa) {
+ ncomp = dst->ssa.num_components;
+ } else {
+ ncomp = 0;
+ for (int i = 0; i < 4; i++)
+ ncomp += !!(alu->dest.write_mask & 1 << i);
+ }
+
+ instr = instr_create_alu(ctx, alu->op, ncomp);
+ set_index(ctx, dst, instr);
+ instr->alu.saturate = alu->dest.saturate;
+ instr->alu.write_mask = alu->dest.write_mask;
+
+ for (int i = 0; i < info->num_inputs; i++) {
+ nir_alu_src *src = &alu->src[i];
+
+ /* compress swizzle with writemask when applicable */
+ unsigned swiz = 0, j = 0;
+ for (int i = 0; i < 4; i++) {
+ if (!(alu->dest.write_mask & 1 << i) && !info->output_size)
+ continue;
+ swiz |= swiz_set(src->swizzle[i], j++);
+ }
+
+ instr->src[i] = make_src(ctx, src->src);
+ instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
+ instr->src[i].negate = src->negate;
+ instr->src[i].abs = src->abs;
+ }
+
+ /* workarounds for NIR ops that don't map directly to a2xx ops */
+ switch (alu->op) {
+ case nir_op_fneg:
+ instr->src[0].negate = 1;
+ break;
+ case nir_op_fabs:
+ instr->src[0].abs = 1;
+ break;
+ case nir_op_fsat:
+ instr->alu.saturate = 1;
+ break;
+ case nir_op_slt:
+ tmp = instr->src[0];
+ instr->src[0] = instr->src[1];
+ instr->src[1] = tmp;
+ break;
+ case nir_op_fcsel:
+ tmp = instr->src[1];
+ instr->src[1] = instr->src[2];
+ instr->src[2] = tmp;
+ break;
+ case nir_op_fsub:
+ instr->src[1].negate = !instr->src[1].negate;
+ break;
+ case nir_op_fdot2:
+ instr->src_count = 3;
+ instr->src[2] = ir2_zero(ctx);
+ break;
+ case nir_op_fsign: {
+ /* we need an extra instruction to deal with the zero case */
+ struct ir2_instr *tmp;
+
+ /* tmp = x == 0 ? 0 : 1 */
+ tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
+ tmp->src[0] = instr->src[0];
+ tmp->src[1] = ir2_zero(ctx);
+ tmp->src[2] = load_const(ctx, (float[]){1.0f}, 1);
+
+ /* result = x >= 0 ? tmp : -tmp */
+ instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
+ instr->src[2] = instr->src[1];
+ instr->src[2].negate = true;
+ instr->src_count = 3;
+ } break;
+ default:
+ break;
+ }
}
static void
load_input(struct ir2_context *ctx, nir_dest *dst, unsigned idx)
{
- struct ir2_instr *instr;
- int slot = -1;
-
- if (ctx->so->type == MESA_SHADER_VERTEX) {
- instr = ir2_instr_create_fetch(ctx, dst, 0);
- instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
- instr->fetch.vtx.const_idx = 20 + (idx / 3);
- instr->fetch.vtx.const_idx_sel = idx % 3;
- return;
- }
-
- /* get slot from idx */
- nir_foreach_shader_in_variable(var, ctx->nir) {
- if (var->data.driver_location == idx) {
- slot = var->data.location;
- break;
- }
- }
- assert(slot >= 0);
-
- switch (slot) {
- case VARYING_SLOT_POS:
- /* need to extract xy with abs and add tile offset on a20x
- * zw from fragcoord input (w inverted in fragment shader)
- * TODO: only components that are required by fragment shader
- */
- instr = instr_create_alu_reg(ctx,
- ctx->so->is_a20x ? nir_op_fadd : nir_op_mov, 3, NULL);
- instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
- instr->src[0].abs = true;
- /* on a20x, C64 contains the tile offset */
- instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
-
- instr = instr_create_alu_reg(ctx, nir_op_mov, 4, instr);
- instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
-
- instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
- instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
-
- unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
- instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
- instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
- break;
- default:
- instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
- instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
- break;
- }
+ struct ir2_instr *instr;
+ int slot = -1;
+
+ if (ctx->so->type == MESA_SHADER_VERTEX) {
+ instr = ir2_instr_create_fetch(ctx, dst, 0);
+ instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
+ instr->fetch.vtx.const_idx = 20 + (idx / 3);
+ instr->fetch.vtx.const_idx_sel = idx % 3;
+ return;
+ }
+
+ /* get slot from idx */
+ nir_foreach_shader_in_variable (var, ctx->nir) {
+ if (var->data.driver_location == idx) {
+ slot = var->data.location;
+ break;
+ }
+ }
+ assert(slot >= 0);
+
+ switch (slot) {
+ case VARYING_SLOT_POS:
+ /* need to extract xy with abs and add tile offset on a20x
+ * zw from fragcoord input (w inverted in fragment shader)
+ * TODO: only components that are required by fragment shader
+ */
+ instr = instr_create_alu_reg(
+ ctx, ctx->so->is_a20x ? nir_op_fadd : nir_op_mov, 3, NULL);
+ instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
+ instr->src[0].abs = true;
+ /* on a20x, C64 contains the tile offset */
+ instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
+
+ instr = instr_create_alu_reg(ctx, nir_op_mov, 4, instr);
+ instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
+
+ instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
+ instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
+
+ unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
+ instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
+ instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
+ break;
+ default:
+ instr = instr_create_alu_dest(ctx, nir_op_mov, dst);
+ instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
+ break;
+ }
}
static unsigned
output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
{
- int slot = -1;
- unsigned idx = nir_intrinsic_base(intr);
- nir_foreach_shader_out_variable(var, ctx->nir) {
- if (var->data.driver_location == idx) {
- slot = var->data.location;
- break;
- }
- }
- assert(slot != -1);
- return slot;
+ int slot = -1;
+ unsigned idx = nir_intrinsic_base(intr);
+ nir_foreach_shader_out_variable(var, ctx->nir)
+ {
+ if (var->data.driver_location == idx) {
+ slot = var->data.location;
+ break;
+ }
+ }
+ assert(slot != -1);
+ return slot;
}
static void
-store_output(struct ir2_context *ctx, nir_src src, unsigned slot, unsigned ncomp)
+store_output(struct ir2_context *ctx, nir_src src, unsigned slot,
+ unsigned ncomp)
{
- struct ir2_instr *instr;
- unsigned idx = 0;
-
- if (ctx->so->type == MESA_SHADER_VERTEX) {
- switch (slot) {
- case VARYING_SLOT_POS:
- ctx->position = make_src(ctx, src);
- idx = 62;
- break;
- case VARYING_SLOT_PSIZ:
- ctx->so->writes_psize = true;
- idx = 63;
- break;
- default:
- /* find matching slot from fragment shader input */
- for (idx = 0; idx < ctx->f->inputs_count; idx++)
- if (ctx->f->inputs[idx].slot == slot)
- break;
- if (idx == ctx->f->inputs_count)
- return;
- }
- } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
- /* only color output is implemented */
- return;
- }
-
- instr = instr_create_alu(ctx, nir_op_mov, ncomp);
- instr->src[0] = make_src(ctx, src);
- instr->alu.export = idx;
+ struct ir2_instr *instr;
+ unsigned idx = 0;
+
+ if (ctx->so->type == MESA_SHADER_VERTEX) {
+ switch (slot) {
+ case VARYING_SLOT_POS:
+ ctx->position = make_src(ctx, src);
+ idx = 62;
+ break;
+ case VARYING_SLOT_PSIZ:
+ ctx->so->writes_psize = true;
+ idx = 63;
+ break;
+ default:
+ /* find matching slot from fragment shader input */
+ for (idx = 0; idx < ctx->f->inputs_count; idx++)
+ if (ctx->f->inputs[idx].slot == slot)
+ break;
+ if (idx == ctx->f->inputs_count)
+ return;
+ }
+ } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
+ /* only color output is implemented */
+ return;
+ }
+
+ instr = instr_create_alu(ctx, nir_op_mov, ncomp);
+ instr->src[0] = make_src(ctx, src);
+ instr->alu.export = idx;
}
static void
emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
{
- struct ir2_instr *instr;
- ASSERTED nir_const_value *const_offset;
- unsigned idx;
-
- switch (intr->intrinsic) {
- case nir_intrinsic_load_input:
- load_input(ctx, &intr->dest, nir_intrinsic_base(intr));
- break;
- case nir_intrinsic_store_output:
- store_output(ctx, intr->src[0], output_slot(ctx, intr), intr->num_components);
- break;
- case nir_intrinsic_load_uniform:
- const_offset = nir_src_as_const_value(intr->src[0]);
- assert(const_offset); /* TODO can be false in ES2? */
- idx = nir_intrinsic_base(intr);
- idx += (uint32_t)const_offset[0].f32;
- instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->dest);
- instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
- break;
- case nir_intrinsic_discard:
- case nir_intrinsic_discard_if:
- instr = ir2_instr_create(ctx, IR2_ALU);
- instr->alu.vector_opc = VECTOR_NONE;
- if (intr->intrinsic == nir_intrinsic_discard_if) {
- instr->alu.scalar_opc = KILLNEs;
- instr->src[0] = make_src(ctx, intr->src[0]);
- } else {
- instr->alu.scalar_opc = KILLEs;
- instr->src[0] = ir2_zero(ctx);
- }
- instr->alu.export = -1;
- instr->src_count = 1;
- ctx->so->has_kill = true;
- break;
- case nir_intrinsic_load_front_face:
- /* gl_FrontFacing is in the sign of param.x
- * rcp required because otherwise we can't differentiate -0.0 and +0.0
- */
- ctx->so->need_param = true;
-
- struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
- tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
-
- instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest);
- instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
- instr->src[1] = ir2_zero(ctx);
- break;
- case nir_intrinsic_load_point_coord:
- /* param.zw (note: abs might be needed like fragcoord in param.xy?) */
- ctx->so->need_param = true;
-
- instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->dest);
- instr->src[0] = ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
- break;
- default:
- compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
- break;
- }
+ struct ir2_instr *instr;
+ ASSERTED nir_const_value *const_offset;
+ unsigned idx;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ load_input(ctx, &intr->dest, nir_intrinsic_base(intr));
+ break;
+ case nir_intrinsic_store_output:
+ store_output(ctx, intr->src[0], output_slot(ctx, intr),
+ intr->num_components);
+ break;
+ case nir_intrinsic_load_uniform:
+ const_offset = nir_src_as_const_value(intr->src[0]);
+ assert(const_offset); /* TODO can be false in ES2? */
+ idx = nir_intrinsic_base(intr);
+ idx += (uint32_t)const_offset[0].f32;
+ instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->dest);
+ instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
+ break;
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ instr = ir2_instr_create(ctx, IR2_ALU);
+ instr->alu.vector_opc = VECTOR_NONE;
+ if (intr->intrinsic == nir_intrinsic_discard_if) {
+ instr->alu.scalar_opc = KILLNEs;
+ instr->src[0] = make_src(ctx, intr->src[0]);
+ } else {
+ instr->alu.scalar_opc = KILLEs;
+ instr->src[0] = ir2_zero(ctx);
+ }
+ instr->alu.export = -1;
+ instr->src_count = 1;
+ ctx->so->has_kill = true;
+ break;
+ case nir_intrinsic_load_front_face:
+ /* gl_FrontFacing is in the sign of param.x
+ * rcp required because otherwise we can't differentiate -0.0 and +0.0
+ */
+ ctx->so->need_param = true;
+
+ struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
+ tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
+
+ instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest);
+ instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
+ instr->src[1] = ir2_zero(ctx);
+ break;
+ case nir_intrinsic_load_point_coord:
+ /* param.zw (note: abs might be needed like fragcoord in param.xy?) */
+ ctx->so->need_param = true;
+
+ instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->dest);
+ instr->src[0] =
+ ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
+ break;
+ default:
+ compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
+ break;
+ }
}
static void
-emit_tex(struct ir2_context *ctx, nir_tex_instr * tex)
+emit_tex(struct ir2_context *ctx, nir_tex_instr *tex)
{
- bool is_rect = false, is_cube = false;
- struct ir2_instr *instr;
- nir_src *coord, *lod_bias;
-
- coord = lod_bias = NULL;
-
- for (unsigned i = 0; i < tex->num_srcs; i++) {
- switch (tex->src[i].src_type) {
- case nir_tex_src_coord:
- coord = &tex->src[i].src;
- break;
- case nir_tex_src_bias:
- case nir_tex_src_lod:
- assert(!lod_bias);
- lod_bias = &tex->src[i].src;
- break;
- default:
- compile_error(ctx, "Unhandled NIR tex src type: %d\n",
- tex->src[i].src_type);
- return;
- }
- }
-
- switch (tex->op) {
- case nir_texop_tex:
- case nir_texop_txb:
- case nir_texop_txl:
- break;
- default:
- compile_error(ctx, "unimplemented texop %d\n", tex->op);
- return;
- }
-
- switch (tex->sampler_dim) {
- case GLSL_SAMPLER_DIM_2D:
- break;
- case GLSL_SAMPLER_DIM_RECT:
- is_rect = true;
- break;
- case GLSL_SAMPLER_DIM_CUBE:
- is_cube = true;
- break;
- default:
- compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
- return;
- }
-
- struct ir2_src src_coord = make_src_noconst(ctx, *coord);
-
- /* for cube maps
- * tmp = cube(coord)
- * tmp.xy = tmp.xy / |tmp.z| + 1.5
- * coord = tmp.xyw
- */
- if (is_cube) {
- struct ir2_instr *rcp, *coord_xy;
- unsigned reg_idx;
-
- instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
- instr->src[0] = src_coord;
- instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
- instr->src[1] = src_coord;
- instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
-
- reg_idx = instr->reg - ctx->reg; /* hacky */
-
- rcp = instr_create_alu(ctx, nir_op_frcp, 1);
- rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
- rcp->src[0].abs = true;
-
- coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
- coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
- coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
- coord_xy->src[2] = load_const(ctx, (float[]) {1.5f}, 1);
-
- src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
- /* TODO: lod/bias transformed by src_coord.z ? */
- }
-
- instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH);
- instr->src[0] = src_coord;
- instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_YXW : 0;
- instr->fetch.tex.is_cube = is_cube;
- instr->fetch.tex.is_rect = is_rect;
- instr->fetch.tex.samp_id = tex->sampler_index;
-
- /* for lod/bias, we insert an extra src for the backend to deal with */
- if (lod_bias) {
- instr->src[1] = make_src_noconst(ctx, *lod_bias);
- /* backend will use 2-3 components so apply swizzle */
- swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
- instr->src_count = 2;
- }
+ bool is_rect = false, is_cube = false;
+ struct ir2_instr *instr;
+ nir_src *coord, *lod_bias;
+
+ coord = lod_bias = NULL;
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ coord = &tex->src[i].src;
+ break;
+ case nir_tex_src_bias:
+ case nir_tex_src_lod:
+ assert(!lod_bias);
+ lod_bias = &tex->src[i].src;
+ break;
+ default:
+ compile_error(ctx, "Unhandled NIR tex src type: %d\n",
+ tex->src[i].src_type);
+ return;
+ }
+ }
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ break;
+ default:
+ compile_error(ctx, "unimplemented texop %d\n", tex->op);
+ return;
+ }
+
+ switch (tex->sampler_dim) {
+ case GLSL_SAMPLER_DIM_2D:
+ break;
+ case GLSL_SAMPLER_DIM_RECT:
+ is_rect = true;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ is_cube = true;
+ break;
+ default:
+ compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
+ return;
+ }
+
+ struct ir2_src src_coord = make_src_noconst(ctx, *coord);
+
+ /* for cube maps
+ * tmp = cube(coord)
+ * tmp.xy = tmp.xy / |tmp.z| + 1.5
+ * coord = tmp.xyw
+ */
+ if (is_cube) {
+ struct ir2_instr *rcp, *coord_xy;
+ unsigned reg_idx;
+
+ instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
+ instr->src[0] = src_coord;
+ instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
+ instr->src[1] = src_coord;
+ instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
+
+ reg_idx = instr->reg - ctx->reg; /* hacky */
+
+ rcp = instr_create_alu(ctx, nir_op_frcp, 1);
+ rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
+ rcp->src[0].abs = true;
+
+ coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
+ coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
+ coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+ coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1);
+
+ src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
+ /* TODO: lod/bias transformed by src_coord.z ? */
+ }
+
+ instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH);
+ instr->src[0] = src_coord;
+ instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_YXW : 0;
+ instr->fetch.tex.is_cube = is_cube;
+ instr->fetch.tex.is_rect = is_rect;
+ instr->fetch.tex.samp_id = tex->sampler_index;
+
+ /* for lod/bias, we insert an extra src for the backend to deal with */
+ if (lod_bias) {
+ instr->src[1] = make_src_noconst(ctx, *lod_bias);
+ /* backend will use 2-3 components so apply swizzle */
+ swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
+ instr->src_count = 2;
+ }
}
static void
-setup_input(struct ir2_context *ctx, nir_variable * in)
+setup_input(struct ir2_context *ctx, nir_variable *in)
{
- struct fd2_shader_stateobj *so = ctx->so;
- ASSERTED unsigned array_len = MAX2(glsl_get_length(in->type), 1);
- unsigned n = in->data.driver_location;
- unsigned slot = in->data.location;
+ struct fd2_shader_stateobj *so = ctx->so;
+ ASSERTED unsigned array_len = MAX2(glsl_get_length(in->type), 1);
+ unsigned n = in->data.driver_location;
+ unsigned slot = in->data.location;
- assert(array_len == 1);
+ assert(array_len == 1);
- /* handle later */
- if (ctx->so->type == MESA_SHADER_VERTEX)
- return;
+ /* handle later */
+ if (ctx->so->type == MESA_SHADER_VERTEX)
+ return;
- if (ctx->so->type != MESA_SHADER_FRAGMENT)
- compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
+ if (ctx->so->type != MESA_SHADER_FRAGMENT)
+ compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
- n = ctx->f->inputs_count++;
+ n = ctx->f->inputs_count++;
- /* half of fragcoord from param reg, half from a varying */
- if (slot == VARYING_SLOT_POS) {
- ctx->f->fragcoord = n;
- so->need_param = true;
- }
+ /* half of fragcoord from param reg, half from a varying */
+ if (slot == VARYING_SLOT_POS) {
+ ctx->f->fragcoord = n;
+ so->need_param = true;
+ }
- ctx->f->inputs[n].slot = slot;
- ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
+ ctx->f->inputs[n].slot = slot;
+ ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
- /* in->data.interpolation?
- * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
- */
+ /* in->data.interpolation?
+ * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
+ */
}
static void
-emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr * undef)
+emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr *undef)
{
- /* TODO we don't want to emit anything for undefs */
+ /* TODO we don't want to emit anything for undefs */
- struct ir2_instr *instr;
+ struct ir2_instr *instr;
- instr = instr_create_alu_dest(ctx, nir_op_mov,
- &(nir_dest) {.ssa = undef->def,.is_ssa = true});
- instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
+ instr = instr_create_alu_dest(
+ ctx, nir_op_mov, &(nir_dest){.ssa = undef->def, .is_ssa = true});
+ instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
}
static void
-emit_instr(struct ir2_context *ctx, nir_instr * instr)
+emit_instr(struct ir2_context *ctx, nir_instr *instr)
{
- switch (instr->type) {
- case nir_instr_type_alu:
- emit_alu(ctx, nir_instr_as_alu(instr));
- break;
- case nir_instr_type_deref:
- /* ignored, handled as part of the intrinsic they are src to */
- break;
- case nir_instr_type_intrinsic:
- emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
- break;
- case nir_instr_type_load_const:
- /* dealt with when using nir_src */
- break;
- case nir_instr_type_tex:
- emit_tex(ctx, nir_instr_as_tex(instr));
- break;
- case nir_instr_type_jump:
- ctx->block_has_jump[ctx->block_idx] = true;
- break;
- case nir_instr_type_ssa_undef:
- emit_undef(ctx, nir_instr_as_ssa_undef(instr));
- break;
- default:
- break;
- }
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ emit_alu(ctx, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_deref:
+ /* ignored, handled as part of the intrinsic they are src to */
+ break;
+ case nir_instr_type_intrinsic:
+ emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_load_const:
+ /* dealt with when using nir_src */
+ break;
+ case nir_instr_type_tex:
+ emit_tex(ctx, nir_instr_as_tex(instr));
+ break;
+ case nir_instr_type_jump:
+ ctx->block_has_jump[ctx->block_idx] = true;
+ break;
+ case nir_instr_type_ssa_undef:
+ emit_undef(ctx, nir_instr_as_ssa_undef(instr));
+ break;
+ default:
+ break;
+ }
}
/* fragcoord.zw and a20x hw binning outputs */
static void
extra_position_exports(struct ir2_context *ctx, bool binning)
{
- struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
-
- if (ctx->f->fragcoord < 0 && !binning)
- return;
-
- instr = instr_create_alu(ctx, nir_op_fmax, 1);
- instr->src[0] = ctx->position;
- instr->src[0].swizzle = IR2_SWIZZLE_W;
- instr->src[1] = ir2_zero(ctx);
-
- rcp = instr_create_alu(ctx, nir_op_frcp, 1);
- rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
-
- sc = instr_create_alu(ctx, nir_op_fmul, 4);
- sc->src[0] = ctx->position;
- sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
-
- wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
- wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
- wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
- wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
-
- /* fragcoord z/w */
- if (ctx->f->fragcoord >= 0 && !binning) {
- instr = instr_create_alu(ctx, nir_op_mov, 1);
- instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
- instr->alu.export = ctx->f->fragcoord;
-
- instr = instr_create_alu(ctx, nir_op_mov, 1);
- instr->src[0] = ctx->position;
- instr->src[0].swizzle = IR2_SWIZZLE_W;
- instr->alu.export = ctx->f->fragcoord;
- instr->alu.write_mask = 2;
- }
-
- if (!binning)
- return;
-
- off = instr_create_alu(ctx, nir_op_fadd, 1);
- off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
- off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
-
- /* 8 max set in freedreno_screen.. unneeded instrs patched out */
- for (int i = 0; i < 8; i++) {
- instr = instr_create_alu(ctx, nir_op_ffma, 4);
- instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
- instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
- instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
- instr->alu.export = 32;
-
- instr = instr_create_alu(ctx, nir_op_ffma, 4);
- instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
- instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
- instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
- instr->alu.export = 33;
- }
+ struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
+
+ if (ctx->f->fragcoord < 0 && !binning)
+ return;
+
+ instr = instr_create_alu(ctx, nir_op_fmax, 1);
+ instr->src[0] = ctx->position;
+ instr->src[0].swizzle = IR2_SWIZZLE_W;
+ instr->src[1] = ir2_zero(ctx);
+
+ rcp = instr_create_alu(ctx, nir_op_frcp, 1);
+ rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
+
+ sc = instr_create_alu(ctx, nir_op_fmul, 4);
+ sc->src[0] = ctx->position;
+ sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+
+ wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
+ wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
+ wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
+ wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
+
+ /* fragcoord z/w */
+ if (ctx->f->fragcoord >= 0 && !binning) {
+ instr = instr_create_alu(ctx, nir_op_mov, 1);
+ instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
+ instr->alu.export = ctx->f->fragcoord;
+
+ instr = instr_create_alu(ctx, nir_op_mov, 1);
+ instr->src[0] = ctx->position;
+ instr->src[0].swizzle = IR2_SWIZZLE_W;
+ instr->alu.export = ctx->f->fragcoord;
+ instr->alu.write_mask = 2;
+ }
+
+ if (!binning)
+ return;
+
+ off = instr_create_alu(ctx, nir_op_fadd, 1);
+ off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
+ off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
+
+ /* 8 max set in freedreno_screen.. unneeded instrs patched out */
+ for (int i = 0; i < 8; i++) {
+ instr = instr_create_alu(ctx, nir_op_ffma, 4);
+ instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
+ instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+ instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
+ instr->alu.export = 32;
+
+ instr = instr_create_alu(ctx, nir_op_ffma, 4);
+ instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
+ instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
+ instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
+ instr->alu.export = 33;
+ }
}
static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
static bool
-emit_block(struct ir2_context *ctx, nir_block * block)
+emit_block(struct ir2_context *ctx, nir_block *block)
{
- struct ir2_instr *instr;
- nir_block *succs = block->successors[0];
+ struct ir2_instr *instr;
+ nir_block *succs = block->successors[0];
- ctx->block_idx = block->index;
+ ctx->block_idx = block->index;
- nir_foreach_instr(instr, block)
- emit_instr(ctx, instr);
+ nir_foreach_instr (instr, block)
+ emit_instr(ctx, instr);
- if (!succs || !succs->index)
- return false;
+ if (!succs || !succs->index)
+ return false;
- /* we want to be smart and always jump and have the backend cleanup
- * but we are not, so there are two cases where jump is needed:
- * loops (succs index lower)
- * jumps (jump instruction seen in block)
- */
- if (succs->index > block->index && !ctx->block_has_jump[block->index])
- return false;
+ /* we want to be smart and always jump and have the backend cleanup
+ * but we are not, so there are two cases where jump is needed:
+ * loops (succs index lower)
+ * jumps (jump instruction seen in block)
+ */
+ if (succs->index > block->index && !ctx->block_has_jump[block->index])
+ return false;
- assert(block->successors[1] == NULL);
+ assert(block->successors[1] == NULL);
- instr = ir2_instr_create(ctx, IR2_CF);
- instr->cf.block_idx = succs->index;
- /* XXX can't jump to a block with different predicate */
- return true;
+ instr = ir2_instr_create(ctx, IR2_CF);
+ instr->cf.block_idx = succs->index;
+ /* XXX can't jump to a block with different predicate */
+ return true;
}
static void
-emit_if(struct ir2_context *ctx, nir_if * nif)
+emit_if(struct ir2_context *ctx, nir_if *nif)
{
- unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
- struct ir2_instr *instr;
-
- /* XXX: blob seems to always use same register for condition */
-
- instr = ir2_instr_create(ctx, IR2_ALU);
- instr->src[0] = make_src(ctx, nif->condition);
- instr->src_count = 1;
- instr->ssa.ncomp = 1;
- instr->alu.vector_opc = VECTOR_NONE;
- instr->alu.scalar_opc = SCALAR_NONE;
- instr->alu.export = -1;
- instr->alu.write_mask = 1;
- instr->pred = 0;
-
- /* if nested, use PRED_SETNE_PUSHv */
- if (pred) {
- instr->alu.vector_opc = PRED_SETNE_PUSHv;
- instr->src[1] = instr->src[0];
- instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
- instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
- instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
- instr->src_count = 2;
- } else {
- instr->alu.scalar_opc = PRED_SETNEs;
- }
-
- ctx->pred_idx = instr->idx;
- ctx->pred = 3;
-
- emit_cf_list(ctx, &nif->then_list);
-
- /* TODO: if these is no else branch we don't need this
- * and if the else branch is simple, can just flip ctx->pred instead
- */
- instr = ir2_instr_create(ctx, IR2_ALU);
- instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
- instr->src_count = 1;
- instr->ssa.ncomp = 1;
- instr->alu.vector_opc = VECTOR_NONE;
- instr->alu.scalar_opc = PRED_SET_INVs;
- instr->alu.export = -1;
- instr->alu.write_mask = 1;
- instr->pred = 0;
- ctx->pred_idx = instr->idx;
-
- emit_cf_list(ctx, &nif->else_list);
-
- /* restore predicate for nested predicates */
- if (pred) {
- instr = ir2_instr_create(ctx, IR2_ALU);
- instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
- instr->src_count = 1;
- instr->ssa.ncomp = 1;
- instr->alu.vector_opc = VECTOR_NONE;
- instr->alu.scalar_opc = PRED_SET_POPs;
- instr->alu.export = -1;
- instr->alu.write_mask = 1;
- instr->pred = 0;
- ctx->pred_idx = instr->idx;
- }
-
- /* restore ctx->pred */
- ctx->pred = pred;
+ unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
+ struct ir2_instr *instr;
+
+ /* XXX: blob seems to always use same register for condition */
+
+ instr = ir2_instr_create(ctx, IR2_ALU);
+ instr->src[0] = make_src(ctx, nif->condition);
+ instr->src_count = 1;
+ instr->ssa.ncomp = 1;
+ instr->alu.vector_opc = VECTOR_NONE;
+ instr->alu.scalar_opc = SCALAR_NONE;
+ instr->alu.export = -1;
+ instr->alu.write_mask = 1;
+ instr->pred = 0;
+
+ /* if nested, use PRED_SETNE_PUSHv */
+ if (pred) {
+ instr->alu.vector_opc = PRED_SETNE_PUSHv;
+ instr->src[1] = instr->src[0];
+ instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
+ instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
+ instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
+ instr->src_count = 2;
+ } else {
+ instr->alu.scalar_opc = PRED_SETNEs;
+ }
+
+ ctx->pred_idx = instr->idx;
+ ctx->pred = 3;
+
+ emit_cf_list(ctx, &nif->then_list);
+
+ /* TODO: if these is no else branch we don't need this
+ * and if the else branch is simple, can just flip ctx->pred instead
+ */
+ instr = ir2_instr_create(ctx, IR2_ALU);
+ instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
+ instr->src_count = 1;
+ instr->ssa.ncomp = 1;
+ instr->alu.vector_opc = VECTOR_NONE;
+ instr->alu.scalar_opc = PRED_SET_INVs;
+ instr->alu.export = -1;
+ instr->alu.write_mask = 1;
+ instr->pred = 0;
+ ctx->pred_idx = instr->idx;
+
+ emit_cf_list(ctx, &nif->else_list);
+
+ /* restore predicate for nested predicates */
+ if (pred) {
+ instr = ir2_instr_create(ctx, IR2_ALU);
+ instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
+ instr->src_count = 1;
+ instr->ssa.ncomp = 1;
+ instr->alu.vector_opc = VECTOR_NONE;
+ instr->alu.scalar_opc = PRED_SET_POPs;
+ instr->alu.export = -1;
+ instr->alu.write_mask = 1;
+ instr->pred = 0;
+ ctx->pred_idx = instr->idx;
+ }
+
+ /* restore ctx->pred */
+ ctx->pred = pred;
}
/* get the highest block idx in the loop, so we know when
static unsigned
loop_last_block(struct exec_list *list)
{
- nir_cf_node *node =
- exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
- switch (node->type) {
- case nir_cf_node_block:
- return nir_cf_node_as_block(node)->index;
- case nir_cf_node_if:
- assert(0); /* XXX could this ever happen? */
- return 0;
- case nir_cf_node_loop:
- return loop_last_block(&nir_cf_node_as_loop(node)->body);
- default:
- compile_error(ctx, "Not supported\n");
- return 0;
- }
+ nir_cf_node *node =
+ exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
+ switch (node->type) {
+ case nir_cf_node_block:
+ return nir_cf_node_as_block(node)->index;
+ case nir_cf_node_if:
+ assert(0); /* XXX could this ever happen? */
+ return 0;
+ case nir_cf_node_loop:
+ return loop_last_block(&nir_cf_node_as_loop(node)->body);
+ default:
+ compile_error(ctx, "Not supported\n");
+ return 0;
+ }
}
static void
emit_loop(struct ir2_context *ctx, nir_loop *nloop)
{
- ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
- emit_cf_list(ctx, &nloop->body);
- ctx->loop_depth--;
+ ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
+ emit_cf_list(ctx, &nloop->body);
+ ctx->loop_depth--;
}
static bool
emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
{
- bool ret = false;
- foreach_list_typed(nir_cf_node, node, node, list) {
- ret = false;
- switch (node->type) {
- case nir_cf_node_block:
- ret = emit_block(ctx, nir_cf_node_as_block(node));
- break;
- case nir_cf_node_if:
- emit_if(ctx, nir_cf_node_as_if(node));
- break;
- case nir_cf_node_loop:
- emit_loop(ctx, nir_cf_node_as_loop(node));
- break;
- case nir_cf_node_function:
- compile_error(ctx, "Not supported\n");
- break;
- }
- }
- return ret;
+ bool ret = false;
+ foreach_list_typed (nir_cf_node, node, node, list) {
+ ret = false;
+ switch (node->type) {
+ case nir_cf_node_block:
+ ret = emit_block(ctx, nir_cf_node_as_block(node));
+ break;
+ case nir_cf_node_if:
+ emit_if(ctx, nir_cf_node_as_if(node));
+ break;
+ case nir_cf_node_loop:
+ emit_loop(ctx, nir_cf_node_as_loop(node));
+ break;
+ case nir_cf_node_function:
+ compile_error(ctx, "Not supported\n");
+ break;
+ }
+ }
+ return ret;
}
-static void cleanup_binning(struct ir2_context *ctx)
+static void
+cleanup_binning(struct ir2_context *ctx)
{
- assert(ctx->so->type == MESA_SHADER_VERTEX);
+ assert(ctx->so->type == MESA_SHADER_VERTEX);
- /* kill non-position outputs for binning variant */
- nir_foreach_block(block, nir_shader_get_entrypoint(ctx->nir)) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
+ /* kill non-position outputs for binning variant */
+ nir_foreach_block (block, nir_shader_get_entrypoint(ctx->nir)) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_store_output)
- continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
- if (output_slot(ctx, intr) != VARYING_SLOT_POS)
- nir_instr_remove(instr);
- }
- }
+ if (output_slot(ctx, intr) != VARYING_SLOT_POS)
+ nir_instr_remove(instr);
+ }
+ }
- ir2_optimize_nir(ctx->nir, false);
+ ir2_optimize_nir(ctx->nir, false);
}
static bool
ir2_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
{
- if (instr->type != nir_instr_type_alu)
- return false;
-
- nir_alu_instr *alu = nir_instr_as_alu(instr);
- switch (alu->op) {
- case nir_op_frsq:
- case nir_op_frcp:
- case nir_op_flog2:
- case nir_op_fexp2:
- case nir_op_fsqrt:
- case nir_op_fcos:
- case nir_op_fsin:
- return true;
- default:
- break;
- }
-
- return false;
+ if (instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_frsq:
+ case nir_op_frcp:
+ case nir_op_flog2:
+ case nir_op_fexp2:
+ case nir_op_fsqrt:
+ case nir_op_fcos:
+ case nir_op_fsin:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
}
void
ir2_nir_compile(struct ir2_context *ctx, bool binning)
{
- struct fd2_shader_stateobj *so = ctx->so;
+ struct fd2_shader_stateobj *so = ctx->so;
- memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
+ memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
- ctx->nir = nir_shader_clone(NULL, so->nir);
+ ctx->nir = nir_shader_clone(NULL, so->nir);
- if (binning)
- cleanup_binning(ctx);
+ if (binning)
+ cleanup_binning(ctx);
- OPT_V(ctx->nir, nir_copy_prop);
- OPT_V(ctx->nir, nir_opt_dce);
- OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
+ OPT_V(ctx->nir, nir_copy_prop);
+ OPT_V(ctx->nir, nir_opt_dce);
+ OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
- OPT_V(ctx->nir, nir_lower_int_to_float);
- OPT_V(ctx->nir, nir_lower_bool_to_float);
- while(OPT(ctx->nir, nir_opt_algebraic));
- OPT_V(ctx->nir, nir_opt_algebraic_late);
- OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods);
+ OPT_V(ctx->nir, nir_lower_int_to_float);
+ OPT_V(ctx->nir, nir_lower_bool_to_float);
+ while (OPT(ctx->nir, nir_opt_algebraic))
+ ;
+ OPT_V(ctx->nir, nir_opt_algebraic_late);
+ OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods);
- OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL);
+ OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL);
- OPT_V(ctx->nir, nir_lower_locals_to_regs);
+ OPT_V(ctx->nir, nir_lower_locals_to_regs);
- OPT_V(ctx->nir, nir_convert_from_ssa, true);
+ OPT_V(ctx->nir, nir_convert_from_ssa, true);
- OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
- OPT_V(ctx->nir, nir_lower_vec_to_movs, NULL, NULL);
+ OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
+ OPT_V(ctx->nir, nir_lower_vec_to_movs, NULL, NULL);
- OPT_V(ctx->nir, nir_opt_dce);
+ OPT_V(ctx->nir, nir_opt_dce);
- nir_sweep(ctx->nir);
+ nir_sweep(ctx->nir);
- if (FD_DBG(DISASM)) {
- debug_printf("----------------------\n");
- nir_print_shader(ctx->nir, stdout);
- debug_printf("----------------------\n");
- }
+ if (FD_DBG(DISASM)) {
+ debug_printf("----------------------\n");
+ nir_print_shader(ctx->nir, stdout);
+ debug_printf("----------------------\n");
+ }
- /* fd2_shader_stateobj init */
- if (so->type == MESA_SHADER_FRAGMENT) {
- ctx->f->fragcoord = -1;
- ctx->f->inputs_count = 0;
- memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
- }
+ /* fd2_shader_stateobj init */
+ if (so->type == MESA_SHADER_FRAGMENT) {
+ ctx->f->fragcoord = -1;
+ ctx->f->inputs_count = 0;
+ memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
+ }
- /* Setup inputs: */
- nir_foreach_shader_in_variable(in, ctx->nir)
- setup_input(ctx, in);
+ /* Setup inputs: */
+ nir_foreach_shader_in_variable (in, ctx->nir)
+ setup_input(ctx, in);
- if (so->type == MESA_SHADER_FRAGMENT) {
- unsigned idx;
- for (idx = 0; idx < ctx->f->inputs_count; idx++) {
- ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
- update_range(ctx, &ctx->input[idx]);
- }
- /* assume we have param input and kill it later if not */
- ctx->input[idx].ncomp = 4;
- update_range(ctx, &ctx->input[idx]);
- } else {
- ctx->input[0].ncomp = 1;
- ctx->input[2].ncomp = 1;
- update_range(ctx, &ctx->input[0]);
- update_range(ctx, &ctx->input[2]);
- }
+ if (so->type == MESA_SHADER_FRAGMENT) {
+ unsigned idx;
+ for (idx = 0; idx < ctx->f->inputs_count; idx++) {
+ ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
+ update_range(ctx, &ctx->input[idx]);
+ }
+ /* assume we have param input and kill it later if not */
+ ctx->input[idx].ncomp = 4;
+ update_range(ctx, &ctx->input[idx]);
+ } else {
+ ctx->input[0].ncomp = 1;
+ ctx->input[2].ncomp = 1;
+ update_range(ctx, &ctx->input[0]);
+ update_range(ctx, &ctx->input[2]);
+ }
- /* And emit the body: */
- nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
+ /* And emit the body: */
+ nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
- nir_foreach_register(reg, &fxn->registers) {
- ctx->reg[reg->index].ncomp = reg->num_components;
- ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1);
- }
+ nir_foreach_register (reg, &fxn->registers) {
+ ctx->reg[reg->index].ncomp = reg->num_components;
+ ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1);
+ }
- nir_metadata_require(fxn, nir_metadata_block_index);
- emit_cf_list(ctx, &fxn->body);
- /* TODO emit_block(ctx, fxn->end_block); */
+ nir_metadata_require(fxn, nir_metadata_block_index);
+ emit_cf_list(ctx, &fxn->body);
+ /* TODO emit_block(ctx, fxn->end_block); */
- if (so->type == MESA_SHADER_VERTEX)
- extra_position_exports(ctx, binning);
+ if (so->type == MESA_SHADER_VERTEX)
+ extra_position_exports(ctx, binning);
- ralloc_free(ctx->nir);
+ ralloc_free(ctx->nir);
- /* kill unused param input */
- if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
- ctx->input[ctx->f->inputs_count].initialized = false;
+ /* kill unused param input */
+ if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
+ ctx->input[ctx->f->inputs_count].initialized = false;
}
* Jonathan Marek <jonathan@marek.ca>
*/
-#include <stdlib.h>
+#include <assert.h>
#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <assert.h>
-#include "ir2.h"
-#include "fd2_program.h"
#include "ir2/instr-a2xx.h"
+#include "fd2_program.h"
+#include "ir2.h"
enum ir2_src_type {
- IR2_SRC_SSA,
- IR2_SRC_REG,
- IR2_SRC_INPUT,
- IR2_SRC_CONST,
+ IR2_SRC_SSA,
+ IR2_SRC_REG,
+ IR2_SRC_INPUT,
+ IR2_SRC_CONST,
};
struct ir2_src {
- /* num can mean different things
- * ssa: index of instruction
- * reg: index in ctx->reg array
- * input: index in ctx->input array
- * const: constant index (C0, C1, etc)
- */
- uint16_t num;
- uint8_t swizzle;
- enum ir2_src_type type : 2;
- uint8_t abs : 1;
- uint8_t negate : 1;
- uint8_t : 4;
+ /* num can mean different things
+ * ssa: index of instruction
+ * reg: index in ctx->reg array
+ * input: index in ctx->input array
+ * const: constant index (C0, C1, etc)
+ */
+ uint16_t num;
+ uint8_t swizzle;
+ enum ir2_src_type type : 2;
+ uint8_t abs : 1;
+ uint8_t negate : 1;
+ uint8_t : 4;
};
struct ir2_reg_component {
- uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
- bool alloc : 1; /* is it currently allocated */
- uint8_t ref_count; /* for ra */
+ uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
+ bool alloc : 1; /* is it currently allocated */
+ uint8_t ref_count; /* for ra */
};
struct ir2_reg {
- uint8_t idx; /* assigned hardware register */
- uint8_t ncomp;
-
- uint8_t loop_depth;
- bool initialized;
- /* block_idx to free on (-1 = free on ref_count==0) */
- int block_idx_free;
- struct ir2_reg_component comp[4];
+ uint8_t idx; /* assigned hardware register */
+ uint8_t ncomp;
+
+ uint8_t loop_depth;
+ bool initialized;
+ /* block_idx to free on (-1 = free on ref_count==0) */
+ int block_idx_free;
+ struct ir2_reg_component comp[4];
};
struct ir2_instr {
- unsigned idx;
-
- unsigned block_idx;
-
- enum {
- IR2_NONE,
- IR2_FETCH,
- IR2_ALU,
- IR2_CF,
- } type : 2;
-
- /* instruction needs to be emitted (for scheduling) */
- bool need_emit : 1;
-
- /* predicate value - (usually) same for entire block */
- uint8_t pred : 2;
-
- /* src */
- uint8_t src_count;
- struct ir2_src src[4];
-
- /* dst */
- bool is_ssa;
- union {
- struct ir2_reg ssa;
- struct ir2_reg *reg;
- };
-
- /* type-specific */
- union {
- struct {
- instr_fetch_opc_t opc : 5;
- union {
- struct {
- uint8_t const_idx;
- uint8_t const_idx_sel;
- } vtx;
- struct {
- bool is_cube : 1;
- bool is_rect : 1;
- uint8_t samp_id;
- } tex;
- };
- } fetch;
- struct {
- /* store possible opcs, then we can choose vector/scalar instr */
- instr_scalar_opc_t scalar_opc : 6;
- instr_vector_opc_t vector_opc : 5;
- /* same as nir */
- uint8_t write_mask : 4;
- bool saturate : 1;
-
- /* export idx (-1 no export) */
- int8_t export;
-
- /* for scalarized 2 src instruction */
- uint8_t src1_swizzle;
- } alu;
- struct {
- /* jmp dst block_idx */
- uint8_t block_idx;
- } cf;
- };
+ unsigned idx;
+
+ unsigned block_idx;
+
+ enum {
+ IR2_NONE,
+ IR2_FETCH,
+ IR2_ALU,
+ IR2_CF,
+ } type : 2;
+
+ /* instruction needs to be emitted (for scheduling) */
+ bool need_emit : 1;
+
+ /* predicate value - (usually) same for entire block */
+ uint8_t pred : 2;
+
+ /* src */
+ uint8_t src_count;
+ struct ir2_src src[4];
+
+ /* dst */
+ bool is_ssa;
+ union {
+ struct ir2_reg ssa;
+ struct ir2_reg *reg;
+ };
+
+ /* type-specific */
+ union {
+ struct {
+ instr_fetch_opc_t opc : 5;
+ union {
+ struct {
+ uint8_t const_idx;
+ uint8_t const_idx_sel;
+ } vtx;
+ struct {
+ bool is_cube : 1;
+ bool is_rect : 1;
+ uint8_t samp_id;
+ } tex;
+ };
+ } fetch;
+ struct {
+ /* store possible opcs, then we can choose vector/scalar instr */
+ instr_scalar_opc_t scalar_opc : 6;
+ instr_vector_opc_t vector_opc : 5;
+ /* same as nir */
+ uint8_t write_mask : 4;
+ bool saturate : 1;
+
+ /* export idx (-1 no export) */
+ int8_t export;
+
+ /* for scalarized 2 src instruction */
+ uint8_t src1_swizzle;
+ } alu;
+ struct {
+ /* jmp dst block_idx */
+ uint8_t block_idx;
+ } cf;
+ };
};
struct ir2_sched_instr {
- uint32_t reg_state[8];
- struct ir2_instr *instr, *instr_s;
+ uint32_t reg_state[8];
+ struct ir2_instr *instr, *instr_s;
};
struct ir2_context {
- struct fd2_shader_stateobj *so;
+ struct fd2_shader_stateobj *so;
- unsigned block_idx, pred_idx;
- uint8_t pred;
- bool block_has_jump[64];
+ unsigned block_idx, pred_idx;
+ uint8_t pred;
+ bool block_has_jump[64];
- unsigned loop_last_block[64];
- unsigned loop_depth;
+ unsigned loop_last_block[64];
+ unsigned loop_depth;
- nir_shader *nir;
+ nir_shader *nir;
- /* ssa index of position output */
- struct ir2_src position;
+ /* ssa index of position output */
+ struct ir2_src position;
- /* to translate SSA ids to instruction ids */
- int16_t ssa_map[1024];
+ /* to translate SSA ids to instruction ids */
+ int16_t ssa_map[1024];
- struct ir2_shader_info *info;
- struct ir2_frag_linkage *f;
+ struct ir2_shader_info *info;
+ struct ir2_frag_linkage *f;
- int prev_export;
+ int prev_export;
- /* RA state */
- struct ir2_reg* live_regs[64];
- uint32_t reg_state[256/32]; /* 64*4 bits */
+ /* RA state */
+ struct ir2_reg *live_regs[64];
+ uint32_t reg_state[256 / 32]; /* 64*4 bits */
- /* inputs */
- struct ir2_reg input[16 + 1]; /* 16 + param */
+ /* inputs */
+ struct ir2_reg input[16 + 1]; /* 16 + param */
- /* non-ssa regs */
- struct ir2_reg reg[64];
- unsigned reg_count;
+ /* non-ssa regs */
+ struct ir2_reg reg[64];
+ unsigned reg_count;
- struct ir2_instr instr[0x300];
- unsigned instr_count;
+ struct ir2_instr instr[0x300];
+ unsigned instr_count;
- struct ir2_sched_instr instr_sched[0x180];
- unsigned instr_sched_count;
+ struct ir2_sched_instr instr_sched[0x180];
+ unsigned instr_sched_count;
};
void assemble(struct ir2_context *ctx, bool binning);
void ir2_nir_compile(struct ir2_context *ctx, bool binning);
-bool ir2_nir_lower_scalar(nir_shader * shader);
+bool ir2_nir_lower_scalar(nir_shader *shader);
void ra_count_refs(struct ir2_context *ctx);
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
- bool export, uint8_t export_writemask);
+ bool export, uint8_t export_writemask);
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
void ra_block_free(struct ir2_context *ctx, unsigned block);
/* utils */
enum {
- IR2_SWIZZLE_Y = 1 << 0,
- IR2_SWIZZLE_Z = 2 << 0,
- IR2_SWIZZLE_W = 3 << 0,
-
- IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
-
- IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
-
- IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
- IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
- IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
- IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
- IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
- IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
- IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
- IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
+ IR2_SWIZZLE_Y = 1 << 0,
+ IR2_SWIZZLE_Z = 2 << 0,
+ IR2_SWIZZLE_W = 3 << 0,
+
+ IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
+
+ IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
+
+ IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
+ IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
+ IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
+ IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
+ IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
+ IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
+ IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
+ IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
};
-#define compile_error(ctx, args...) ({ \
- printf(args); \
- assert(0); \
-})
+#define compile_error(ctx, args...) \
+ ({ \
+ printf(args); \
+ assert(0); \
+ })
static inline struct ir2_src
ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
{
- return (struct ir2_src) {
- .num = num,
- .swizzle = swizzle,
- .type = type
- };
+ return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
}
/* ir2_assemble uses it .. */
struct ir2_src ir2_zero(struct ir2_context *ctx);
-#define ir2_foreach_instr(it, ctx) \
- for (struct ir2_instr *it = (ctx)->instr; ({ \
- while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
- it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
-
-#define ir2_foreach_live_reg(it, ctx) \
- for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
- while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
- __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
-
-#define ir2_foreach_avail(it) \
- for (struct ir2_instr **__instrp = avail, *it; \
- it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
-
-#define ir2_foreach_src(it, instr) \
- for (struct ir2_src *it = instr->src; \
- it != &instr->src[instr->src_count]; it++)
+#define ir2_foreach_instr(it, ctx) \
+ for (struct ir2_instr *it = (ctx)->instr; ({ \
+ while (it != &(ctx)->instr[(ctx)->instr_count] && \
+ it->type == IR2_NONE) \
+ it++; \
+ it != &(ctx)->instr[(ctx)->instr_count]; \
+ }); \
+ it++)
+
+#define ir2_foreach_live_reg(it, ctx) \
+ for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
+ while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \
+ __ptr++; \
+ __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \
+ }); \
+ it++)
+
+#define ir2_foreach_avail(it) \
+ for (struct ir2_instr **__instrp = avail, *it; \
+ it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
+
+#define ir2_foreach_src(it, instr) \
+ for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \
+ it++)
/* mask for register allocation
* 64 registers with 4 components each = 256 bits
*/
/* typedef struct {
- uint64_t data[4];
+ uint64_t data[4];
} regmask_t; */
-static inline bool mask_isset(uint32_t * mask, unsigned num)
+static inline bool
+mask_isset(uint32_t *mask, unsigned num)
{
- return ! !(mask[num / 32] & 1 << num % 32);
+ return !!(mask[num / 32] & 1 << num % 32);
}
-static inline void mask_set(uint32_t * mask, unsigned num)
+static inline void
+mask_set(uint32_t *mask, unsigned num)
{
- mask[num / 32] |= 1 << num % 32;
+ mask[num / 32] |= 1 << num % 32;
}
-static inline void mask_unset(uint32_t * mask, unsigned num)
+static inline void
+mask_unset(uint32_t *mask, unsigned num)
{
- mask[num / 32] &= ~(1 << num % 32);
+ mask[num / 32] &= ~(1 << num % 32);
}
-static inline unsigned mask_reg(uint32_t * mask, unsigned num)
+static inline unsigned
+mask_reg(uint32_t *mask, unsigned num)
{
- return mask[num / 8] >> num % 8 * 4 & 0xf;
+ return mask[num / 8] >> num % 8 * 4 & 0xf;
}
-static inline bool is_export(struct ir2_instr *instr)
+static inline bool
+is_export(struct ir2_instr *instr)
{
- return instr->type == IR2_ALU && instr->alu.export >= 0;
+ return instr->type == IR2_ALU && instr->alu.export >= 0;
}
-static inline instr_alloc_type_t export_buf(unsigned num)
+static inline instr_alloc_type_t
+export_buf(unsigned num)
{
- return num < 32 ? SQ_PARAMETER_PIXEL :
- num >= 62 ? SQ_POSITION : SQ_MEMORY;
+ return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
}
/* component c for channel i */
-static inline unsigned swiz_set(unsigned c, unsigned i)
+static inline unsigned
+swiz_set(unsigned c, unsigned i)
{
- return ((c - i) & 3) << i * 2;
+ return ((c - i) & 3) << i * 2;
}
/* get swizzle in channel i */
-static inline unsigned swiz_get(unsigned swiz, unsigned i)
+static inline unsigned
+swiz_get(unsigned swiz, unsigned i)
{
- return ((swiz >> i * 2) + i) & 3;
+ return ((swiz >> i * 2) + i) & 3;
}
-static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
+static inline unsigned
+swiz_merge(unsigned swiz0, unsigned swiz1)
{
- unsigned swiz = 0;
- for (int i = 0; i < 4; i++)
- swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
- return swiz;
+ unsigned swiz = 0;
+ for (int i = 0; i < 4; i++)
+ swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
+ return swiz;
}
-static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
+static inline void
+swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
{
- unsigned swiz = 0;
- for (int i = 0; i < 4; i++)
- swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
- *swiz0 = swiz;
+ unsigned swiz = 0;
+ for (int i = 0; i < 4; i++)
+ swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
+ *swiz0 = swiz;
}
-static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
+static inline struct ir2_reg *
+get_reg(struct ir2_instr *instr)
{
- return instr->is_ssa ? &instr->ssa : instr->reg;
+ return instr->is_ssa ? &instr->ssa : instr->reg;
}
static inline struct ir2_reg *
get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
{
- switch (src->type) {
- case IR2_SRC_INPUT:
- return &ctx->input[src->num];
- case IR2_SRC_SSA:
- return &ctx->instr[src->num].ssa;
- case IR2_SRC_REG:
- return &ctx->reg[src->num];
- default:
- return NULL;
- }
+ switch (src->type) {
+ case IR2_SRC_INPUT:
+ return &ctx->input[src->num];
+ case IR2_SRC_SSA:
+ return &ctx->instr[src->num].ssa;
+ case IR2_SRC_REG:
+ return &ctx->reg[src->num];
+ default:
+ return NULL;
+ }
}
/* gets a ncomp value for the dst */
-static inline unsigned dst_ncomp(struct ir2_instr *instr)
+static inline unsigned
+dst_ncomp(struct ir2_instr *instr)
{
- if (instr->is_ssa)
- return instr->ssa.ncomp;
+ if (instr->is_ssa)
+ return instr->ssa.ncomp;
- if (instr->type == IR2_FETCH)
- return instr->reg->ncomp;
+ if (instr->type == IR2_FETCH)
+ return instr->reg->ncomp;
- assert(instr->type == IR2_ALU);
+ assert(instr->type == IR2_ALU);
- unsigned ncomp = 0;
- for (int i = 0; i < instr->reg->ncomp; i++)
- ncomp += !!(instr->alu.write_mask & 1 << i);
- return ncomp;
+ unsigned ncomp = 0;
+ for (int i = 0; i < instr->reg->ncomp; i++)
+ ncomp += !!(instr->alu.write_mask & 1 << i);
+ return ncomp;
}
/* gets a ncomp value for the src registers */
-static inline unsigned src_ncomp(struct ir2_instr *instr)
+static inline unsigned
+src_ncomp(struct ir2_instr *instr)
{
- if (instr->type == IR2_FETCH) {
- switch (instr->fetch.opc) {
- case VTX_FETCH:
- return 1;
- case TEX_FETCH:
- return instr->fetch.tex.is_cube ? 3 : 2;
- case TEX_SET_TEX_LOD:
- return 1;
- default:
- assert(0);
- }
- }
-
- switch (instr->alu.scalar_opc) {
- case PRED_SETEs ... KILLONEs:
- return 1;
- default:
- break;
- }
-
- switch (instr->alu.vector_opc) {
- case DOT2ADDv:
- return 2;
- case DOT3v:
- return 3;
- case DOT4v:
- case CUBEv:
- case PRED_SETE_PUSHv:
- return 4;
- default:
- return dst_ncomp(instr);
- }
+ if (instr->type == IR2_FETCH) {
+ switch (instr->fetch.opc) {
+ case VTX_FETCH:
+ return 1;
+ case TEX_FETCH:
+ return instr->fetch.tex.is_cube ? 3 : 2;
+ case TEX_SET_TEX_LOD:
+ return 1;
+ default:
+ assert(0);
+ }
+ }
+
+ switch (instr->alu.scalar_opc) {
+ case PRED_SETEs ... KILLONEs:
+ return 1;
+ default:
+ break;
+ }
+
+ switch (instr->alu.vector_opc) {
+ case DOT2ADDv:
+ return 2;
+ case DOT3v:
+ return 3;
+ case DOT4v:
+ case CUBEv:
+ case PRED_SETE_PUSHv:
+ return 4;
+ default:
+ return dst_ncomp(instr);
+ }
}
#include "ir2_private.h"
/* if an instruction has side effects, we should never kill it */
-static bool has_side_effects(struct ir2_instr *instr)
+static bool
+has_side_effects(struct ir2_instr *instr)
{
- if (instr->type == IR2_CF)
- return true;
- else if (instr->type == IR2_FETCH)
- return false;
-
- switch (instr->alu.scalar_opc) {
- case PRED_SETEs ... KILLONEs:
- return true;
- default:
- break;
- }
-
- switch (instr->alu.vector_opc) {
- case PRED_SETE_PUSHv ... KILLNEv:
- return true;
- default:
- break;
- }
-
- return instr->alu.export >= 0;
+ if (instr->type == IR2_CF)
+ return true;
+ else if (instr->type == IR2_FETCH)
+ return false;
+
+ switch (instr->alu.scalar_opc) {
+ case PRED_SETEs ... KILLONEs:
+ return true;
+ default:
+ break;
+ }
+
+ switch (instr->alu.vector_opc) {
+ case PRED_SETE_PUSHv ... KILLNEv:
+ return true;
+ default:
+ break;
+ }
+
+ return instr->alu.export >= 0;
}
/* mark an instruction as required, and all its sources recursively */
-static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
+static void
+set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
{
- struct ir2_reg *reg;
-
- /* don't repeat work already done */
- if (instr->need_emit)
- return;
-
- instr->need_emit = true;
-
- ir2_foreach_src(src, instr) {
- switch (src->type) {
- case IR2_SRC_SSA:
- set_need_emit(ctx, &ctx->instr[src->num]);
- break;
- case IR2_SRC_REG:
- /* slow .. */
- reg = get_reg_src(ctx, src);
- ir2_foreach_instr(instr, ctx) {
- if (!instr->is_ssa && instr->reg == reg)
- set_need_emit(ctx, instr);
- }
- break;
- default:
- break;
- }
- }
+ struct ir2_reg *reg;
+
+ /* don't repeat work already done */
+ if (instr->need_emit)
+ return;
+
+ instr->need_emit = true;
+
+ ir2_foreach_src(src, instr)
+ {
+ switch (src->type) {
+ case IR2_SRC_SSA:
+ set_need_emit(ctx, &ctx->instr[src->num]);
+ break;
+ case IR2_SRC_REG:
+ /* slow .. */
+ reg = get_reg_src(ctx, src);
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (!instr->is_ssa && instr->reg == reg)
+ set_need_emit(ctx, instr);
+ }
+ break;
+ default:
+ break;
+ }
+ }
}
/* get current bit mask of allocated components for a register */
-static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
+static unsigned
+reg_mask(struct ir2_context *ctx, unsigned idx)
{
- return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
+ return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
}
-static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
+static void
+reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
{
- idx = idx * 4 + c;
- ctx->reg_state[idx/32] |= 1 << idx%32;
+ idx = idx * 4 + c;
+ ctx->reg_state[idx / 32] |= 1 << idx % 32;
}
-static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
+static void
+reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
{
- idx = idx * 4 + c;
- ctx->reg_state[idx/32] &= ~(1 << idx%32);
+ idx = idx * 4 + c;
+ ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
}
-void ra_count_refs(struct ir2_context *ctx)
+void
+ra_count_refs(struct ir2_context *ctx)
{
- struct ir2_reg *reg;
-
- /* mark instructions as needed
- * need to do this because "substitutions" pass makes many movs not needed
- */
- ir2_foreach_instr(instr, ctx) {
- if (has_side_effects(instr))
- set_need_emit(ctx, instr);
- }
-
- /* compute ref_counts */
- ir2_foreach_instr(instr, ctx) {
- /* kill non-needed so they can be skipped */
- if (!instr->need_emit) {
- instr->type = IR2_NONE;
- continue;
- }
-
- ir2_foreach_src(src, instr) {
- if (src->type == IR2_SRC_CONST)
- continue;
-
- reg = get_reg_src(ctx, src);
- for (int i = 0; i < src_ncomp(instr); i++)
- reg->comp[swiz_get(src->swizzle, i)].ref_count++;
- }
- }
+ struct ir2_reg *reg;
+
+ /* mark instructions as needed
+ * need to do this because "substitutions" pass makes many movs not needed
+ */
+ ir2_foreach_instr(instr, ctx)
+ {
+ if (has_side_effects(instr))
+ set_need_emit(ctx, instr);
+ }
+
+ /* compute ref_counts */
+ ir2_foreach_instr(instr, ctx)
+ {
+ /* kill non-needed so they can be skipped */
+ if (!instr->need_emit) {
+ instr->type = IR2_NONE;
+ continue;
+ }
+
+ ir2_foreach_src(src, instr)
+ {
+ if (src->type == IR2_SRC_CONST)
+ continue;
+
+ reg = get_reg_src(ctx, src);
+ for (int i = 0; i < src_ncomp(instr); i++)
+ reg->comp[swiz_get(src->swizzle, i)].ref_count++;
+ }
+ }
}
-void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
- bool export, uint8_t export_writemask)
+void
+ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
+ uint8_t export_writemask)
{
- /* for export, don't allocate anything but set component layout */
- if (export) {
- for (int i = 0; i < 4; i++)
- reg->comp[i].c = i;
- return;
- }
-
- unsigned idx = force_idx;
-
- /* TODO: allocate into the same register if theres room
- * note: the blob doesn't do it, so verify that it is indeed better
- * also, doing it would conflict with scalar mov insertion
- */
-
- /* check if already allocated */
- for (int i = 0; i < reg->ncomp; i++) {
- if (reg->comp[i].alloc)
- return;
- }
-
- if (force_idx < 0) {
- for (idx = 0; idx < 64; idx++) {
- if (reg_mask(ctx, idx) == 0)
- break;
- }
- }
- assert(idx != 64); /* TODO ran out of register space.. */
-
- /* update max_reg value */
- ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
-
- unsigned mask = reg_mask(ctx, idx);
-
- for (int i = 0; i < reg->ncomp; i++) {
- /* don't allocate never used values */
- if (reg->comp[i].ref_count == 0) {
- reg->comp[i].c = 7;
- continue;
- }
-
- /* TODO */
- unsigned c = 1 ? i : (ffs(~mask) - 1);
- mask |= 1 << c;
- reg->comp[i].c = c;
- reg_setmask(ctx, idx, c);
- reg->comp[i].alloc = true;
- }
-
- reg->idx = idx;
- ctx->live_regs[reg->idx] = reg;
+ /* for export, don't allocate anything but set component layout */
+ if (export) {
+ for (int i = 0; i < 4; i++)
+ reg->comp[i].c = i;
+ return;
+ }
+
+ unsigned idx = force_idx;
+
+ /* TODO: allocate into the same register if theres room
+ * note: the blob doesn't do it, so verify that it is indeed better
+ * also, doing it would conflict with scalar mov insertion
+ */
+
+ /* check if already allocated */
+ for (int i = 0; i < reg->ncomp; i++) {
+ if (reg->comp[i].alloc)
+ return;
+ }
+
+ if (force_idx < 0) {
+ for (idx = 0; idx < 64; idx++) {
+ if (reg_mask(ctx, idx) == 0)
+ break;
+ }
+ }
+ assert(idx != 64); /* TODO ran out of register space.. */
+
+ /* update max_reg value */
+ ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
+
+ unsigned mask = reg_mask(ctx, idx);
+
+ for (int i = 0; i < reg->ncomp; i++) {
+ /* don't allocate never used values */
+ if (reg->comp[i].ref_count == 0) {
+ reg->comp[i].c = 7;
+ continue;
+ }
+
+ /* TODO */
+ unsigned c = 1 ? i : (ffs(~mask) - 1);
+ mask |= 1 << c;
+ reg->comp[i].c = c;
+ reg_setmask(ctx, idx, c);
+ reg->comp[i].alloc = true;
+ }
+
+ reg->idx = idx;
+ ctx->live_regs[reg->idx] = reg;
}
/* reduce srcs ref_count and free if needed */
-void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
+void
+ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
{
- struct ir2_reg *reg;
- struct ir2_reg_component *comp;
-
- ir2_foreach_src(src, instr) {
- if (src->type == IR2_SRC_CONST)
- continue;
-
- reg = get_reg_src(ctx, src);
- /* XXX use before write case */
-
- for (int i = 0; i < src_ncomp(instr); i++) {
- comp = ®->comp[swiz_get(src->swizzle, i)];
- if (!--comp->ref_count && reg->block_idx_free < 0) {
- reg_freemask(ctx, reg->idx, comp->c);
- comp->alloc = false;
- }
- }
- }
+ struct ir2_reg *reg;
+ struct ir2_reg_component *comp;
+
+ ir2_foreach_src(src, instr)
+ {
+ if (src->type == IR2_SRC_CONST)
+ continue;
+
+ reg = get_reg_src(ctx, src);
+ /* XXX use before write case */
+
+ for (int i = 0; i < src_ncomp(instr); i++) {
+ comp = ®->comp[swiz_get(src->swizzle, i)];
+ if (!--comp->ref_count && reg->block_idx_free < 0) {
+ reg_freemask(ctx, reg->idx, comp->c);
+ comp->alloc = false;
+ }
+ }
+ }
}
/* free any regs left for a block */
-void ra_block_free(struct ir2_context *ctx, unsigned block)
+void
+ra_block_free(struct ir2_context *ctx, unsigned block)
{
- ir2_foreach_live_reg(reg, ctx) {
- if (reg->block_idx_free != block)
- continue;
-
- for (int i = 0; i < reg->ncomp; i++) {
- if (!reg->comp[i].alloc) /* XXX should never be true? */
- continue;
-
- reg_freemask(ctx, reg->idx, reg->comp[i].c);
- reg->comp[i].alloc = false;
- }
- ctx->live_regs[reg->idx] = NULL;
- }
+ ir2_foreach_live_reg(reg, ctx)
+ {
+ if (reg->block_idx_free != block)
+ continue;
+
+ for (int i = 0; i < reg->ncomp; i++) {
+ if (!reg->comp[i].alloc) /* XXX should never be true? */
+ continue;
+
+ reg_freemask(ctx, reg->idx, reg->comp[i].c);
+ reg->comp[i].alloc = false;
+ }
+ ctx->live_regs[reg->idx] = NULL;
+ }
}
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_dual_blend.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd3_blend.h"
#include "fd3_context.h"
#include "fd3_format.h"
-
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
- switch (func) {
- case PIPE_BLEND_ADD:
- return BLEND_DST_PLUS_SRC;
- case PIPE_BLEND_MIN:
- return BLEND_MIN_DST_SRC;
- case PIPE_BLEND_MAX:
- return BLEND_MAX_DST_SRC;
- case PIPE_BLEND_SUBTRACT:
- return BLEND_SRC_MINUS_DST;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLEND_DST_MINUS_SRC;
- default:
- DBG("invalid blend func: %x", func);
- return 0;
- }
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
}
void *
fd3_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso)
+ const struct pipe_blend_state *cso)
{
- struct fd3_blend_stateobj *so;
- enum a3xx_rop_code rop = ROP_COPY;
- bool reads_dest = false;
- int i;
-
- if (cso->logicop_enable) {
- rop = cso->logicop_func; /* maps 1:1 */
- reads_dest = util_logicop_reads_dest(cso->logicop_func);
- }
-
- so = CALLOC_STRUCT(fd3_blend_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
- const struct pipe_rt_blend_state *rt;
- if (cso->independent_blend_enable)
- rt = &cso->rt[i];
- else
- rt = &cso->rt[0];
-
- so->rb_mrt[i].blend_control =
- A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
- A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
-
- so->rb_mrt[i].control =
- A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
- A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
-
- if (rt->blend_enable)
- so->rb_mrt[i].control |=
- A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
- A3XX_RB_MRT_CONTROL_BLEND |
- A3XX_RB_MRT_CONTROL_BLEND2;
-
- if (reads_dest)
- so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
-
- if (cso->dither)
- so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
- }
-
- if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
- so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
-
- return so;
+ struct fd3_blend_stateobj *so;
+ enum a3xx_rop_code rop = ROP_COPY;
+ bool reads_dest = false;
+ int i;
+
+ if (cso->logicop_enable) {
+ rop = cso->logicop_func; /* maps 1:1 */
+ reads_dest = util_logicop_reads_dest(cso->logicop_func);
+ }
+
+ so = CALLOC_STRUCT(fd3_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+ const struct pipe_rt_blend_state *rt;
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
+
+ so->rb_mrt[i].blend_control =
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
+ fd_blend_factor(rt->rgb_src_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
+ fd_blend_factor(rt->rgb_dst_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
+ fd_blend_factor(rt->alpha_src_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
+ blend_func(rt->alpha_func)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
+ fd_blend_factor(rt->alpha_dst_factor));
+
+ so->rb_mrt[i].control =
+ A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+ if (rt->blend_enable)
+ so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+ A3XX_RB_MRT_CONTROL_BLEND |
+ A3XX_RB_MRT_CONTROL_BLEND2;
+
+ if (reads_dest)
+ so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+
+ if (cso->dither)
+ so->rb_mrt[i].control |=
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
+ }
+
+ if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+ so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
+ return so;
}
#ifndef FD3_BLEND_H_
#define FD3_BLEND_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd3_blend_stateobj {
- struct pipe_blend_state base;
- uint32_t rb_render_control;
- struct {
- uint32_t blend_control;
- uint32_t control;
- } rb_mrt[A3XX_MAX_RENDER_TARGETS];
+ struct pipe_blend_state base;
+ uint32_t rb_render_control;
+ struct {
+ uint32_t blend_control;
+ uint32_t control;
+ } rb_mrt[A3XX_MAX_RENDER_TARGETS];
};
static inline struct fd3_blend_stateobj *
fd3_blend_stateobj(struct pipe_blend_state *blend)
{
- return (struct fd3_blend_stateobj *)blend;
+ return (struct fd3_blend_stateobj *)blend;
}
-void * fd3_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso);
+void *fd3_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
#endif /* FD3_BLEND_H_ */
#include "freedreno_query_hw.h"
-#include "fd3_context.h"
#include "fd3_blend.h"
+#include "fd3_context.h"
#include "fd3_draw.h"
#include "fd3_emit.h"
#include "fd3_gmem.h"
#include "fd3_zsa.h"
static void
-fd3_context_destroy(struct pipe_context *pctx)
- in_dt
+fd3_context_destroy(struct pipe_context *pctx) in_dt
{
- struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
+ struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
- u_upload_destroy(fd3_ctx->border_color_uploader);
- pipe_resource_reference(&fd3_ctx->border_color_buf, NULL);
+ u_upload_destroy(fd3_ctx->border_color_uploader);
+ pipe_resource_reference(&fd3_ctx->border_color_buf, NULL);
- fd_context_destroy(pctx);
+ fd_context_destroy(pctx);
- fd_bo_del(fd3_ctx->vs_pvt_mem);
- fd_bo_del(fd3_ctx->fs_pvt_mem);
- fd_bo_del(fd3_ctx->vsc_size_mem);
+ fd_bo_del(fd3_ctx->vs_pvt_mem);
+ fd_bo_del(fd3_ctx->fs_pvt_mem);
+ fd_bo_del(fd3_ctx->vsc_size_mem);
- fd_context_cleanup_common_vbos(&fd3_ctx->base);
+ fd_context_cleanup_common_vbos(&fd3_ctx->base);
- fd_hw_query_fini(pctx);
+ fd_hw_query_fini(pctx);
- free(fd3_ctx);
+ free(fd3_ctx);
}
/* clang-format off */
/* clang-format on */
struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
- in_dt
+fd3_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags) in_dt
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
- struct pipe_context *pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
+ struct pipe_context *pctx;
- if (!fd3_ctx)
- return NULL;
+ if (!fd3_ctx)
+ return NULL;
- pctx = &fd3_ctx->base.base;
- pctx->screen = pscreen;
+ pctx = &fd3_ctx->base.base;
+ pctx->screen = pscreen;
- fd3_ctx->base.dev = fd_device_ref(screen->dev);
- fd3_ctx->base.screen = fd_screen(pscreen);
- fd3_ctx->base.last.key = &fd3_ctx->last_key;
+ fd3_ctx->base.dev = fd_device_ref(screen->dev);
+ fd3_ctx->base.screen = fd_screen(pscreen);
+ fd3_ctx->base.last.key = &fd3_ctx->last_key;
- pctx->destroy = fd3_context_destroy;
- pctx->create_blend_state = fd3_blend_state_create;
- pctx->create_rasterizer_state = fd3_rasterizer_state_create;
- pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
+ pctx->destroy = fd3_context_destroy;
+ pctx->create_blend_state = fd3_blend_state_create;
+ pctx->create_rasterizer_state = fd3_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
- fd3_draw_init(pctx);
- fd3_gmem_init(pctx);
- fd3_texture_init(pctx);
- fd3_prog_init(pctx);
- fd3_emit_init(pctx);
+ fd3_draw_init(pctx);
+ fd3_gmem_init(pctx);
+ fd3_texture_init(pctx);
+ fd3_prog_init(pctx);
+ fd3_emit_init(pctx);
- pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags);
- if (!pctx)
- return NULL;
+ pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags);
+ if (!pctx)
+ return NULL;
- fd_hw_query_init(pctx);
+ fd_hw_query_init(pctx);
- fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
+ fd3_ctx->vs_pvt_mem =
+ fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
- fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
+ fd3_ctx->fs_pvt_mem =
+ fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
- fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
+ fd3_ctx->vsc_size_mem =
+ fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
- fd_context_setup_common_vbos(&fd3_ctx->base);
+ fd_context_setup_common_vbos(&fd3_ctx->base);
- fd3_query_context_init(pctx);
+ fd3_query_context_init(pctx);
- fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
- PIPE_USAGE_STREAM, 0);
+ fd3_ctx->border_color_uploader =
+ u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
- return pctx;
+ return pctx;
}
#include "ir3/ir3_shader.h"
-
struct fd3_context {
- struct fd_context base;
+ struct fd_context base;
- struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+ struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
- /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
- * could combine it with another allocation.
- */
- struct fd_bo *vsc_size_mem;
+ /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
+ * could combine it with another allocation.
+ */
+ struct fd_bo *vsc_size_mem;
- struct u_upload_mgr *border_color_uploader;
- struct pipe_resource *border_color_buf;
+ struct u_upload_mgr *border_color_uploader;
+ struct pipe_resource *border_color_buf;
- /* storage for ctx->last.key: */
- struct ir3_shader_key last_key;
+ /* storage for ctx->last.key: */
+ struct ir3_shader_key last_key;
};
static inline struct fd3_context *
fd3_context(struct fd_context *ctx)
{
- return (struct fd3_context *)ctx;
+ return (struct fd3_context *)ctx;
}
-struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+struct pipe_context *fd3_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags);
#endif /* FD3_CONTEXT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
+#include "util/format/u_format.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
-#include "util/format/u_format.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd3_draw.h"
#include "fd3_context.h"
+#include "fd3_draw.h"
#include "fd3_emit.h"
-#include "fd3_program.h"
#include "fd3_format.h"
+#include "fd3_program.h"
#include "fd3_zsa.h"
static inline uint32_t
add_sat(uint32_t a, int32_t b)
{
- int64_t ret = (uint64_t)a + (int64_t)b;
- if (ret > ~0U)
- return ~0U;
- if (ret < 0)
- return 0;
- return (uint32_t)ret;
+ int64_t ret = (uint64_t)a + (int64_t)b;
+ if (ret > ~0U)
+ return ~0U;
+ if (ret < 0)
+ return 0;
+ return (uint32_t)ret;
}
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd3_emit *emit, unsigned index_offset)
- assert_dt
+ struct fd3_emit *emit, unsigned index_offset) assert_dt
{
- const struct pipe_draw_info *info = emit->info;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
-
- fd3_emit_state(ctx, ring, emit);
-
- if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
- fd3_emit_vertex_bufs(ring, emit);
-
- OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
- OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
-
- OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
- OUT_RING(ring, info->index_bounds_valid ? add_sat(info->min_index, info->index_size ? info->index_bias : 0) : 0); /* VFD_INDEX_MIN */
- OUT_RING(ring, info->index_bounds_valid ? add_sat(info->max_index, info->index_size ? info->index_bias : 0) : ~0); /* VFD_INDEX_MAX */
- OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
- OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
-
- OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
- info->restart_index : 0xffffffff);
-
- /* points + psize -> spritelist: */
- if (ctx->rasterizer->point_size_per_vertex &&
- fd3_emit_get_vp(emit)->writes_psize &&
- (info->mode == PIPE_PRIM_POINTS))
- primtype = DI_PT_POINTLIST_PSIZE;
-
- fd_draw_emit(ctx->batch, ring, primtype,
- emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
- info, emit->draw, index_offset);
+ const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+
+ fd3_emit_state(ctx, ring, emit);
+
+ if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+ fd3_emit_vertex_bufs(ring, emit);
+
+ OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, info->index_bounds_valid
+ ? add_sat(info->min_index,
+ info->index_size ? info->index_bias : 0)
+ : 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, info->index_bounds_valid
+ ? add_sat(info->max_index,
+ info->index_size ? info->index_bias : 0)
+ : ~0); /* VFD_INDEX_MAX */
+ OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, info->index_size ? info->index_bias
+ : emit->draw->start); /* VFD_INDEX_OFFSET */
+
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+ info->restart_index
+ : 0xffffffff);
+
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer->point_size_per_vertex &&
+ fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
+ fd_draw_emit(ctx->batch, ring, primtype,
+ emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
+ emit->draw, index_offset);
}
static bool
fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
- unsigned index_offset)
- in_dt
+ unsigned index_offset) in_dt
{
- struct fd3_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->vtx,
- .info = info,
- .indirect = indirect,
- .draw = draw,
- .key = {
- .vs = ctx->prog.vs,
- .fs = ctx->prog.fs,
- },
- .rasterflat = ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
- .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
- };
-
- if (info->mode != PIPE_PRIM_MAX &&
- !indirect &&
- !info->primitive_restart &&
- !u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
- return false;
-
- if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer))
- emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
-
- ir3_fixup_shader_state(&ctx->base, &emit.key.key);
-
- unsigned dirty = ctx->dirty;
-
- emit.prog = fd3_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
-
- /* bail if compile failed: */
- if (!emit.prog)
- return false;
-
- const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
- const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
-
- ir3_update_max_tf_vtx(ctx, vp);
-
- /* do regular pass first: */
-
- if (unlikely(ctx->stats_users > 0)) {
- ctx->stats.vs_regs += ir3_shader_halfregs(vp);
- ctx->stats.fs_regs += ir3_shader_halfregs(fp);
- }
-
- emit.binning_pass = false;
- emit.dirty = dirty;
- draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
-
- /* and now binning pass: */
- emit.binning_pass = true;
- emit.dirty = dirty & ~(FD_DIRTY_BLEND);
- emit.vs = NULL; /* we changed key so need to refetch vs */
- emit.fs = NULL;
- draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
-
- fd_context_all_clean(ctx);
-
- return true;
+ struct fd3_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->vtx,
+ .info = info,
+ .indirect = indirect,
+ .draw = draw,
+ .key =
+ {
+ .vs = ctx->prog.vs,
+ .fs = ctx->prog.fs,
+ },
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+ };
+
+ if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
+ return false;
+
+ if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer))
+ emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
+
+ ir3_fixup_shader_state(&ctx->base, &emit.key.key);
+
+ unsigned dirty = ctx->dirty;
+
+ emit.prog = fd3_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
+
+ /* bail if compile failed: */
+ if (!emit.prog)
+ return false;
+
+ const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
+ const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
+
+ ir3_update_max_tf_vtx(ctx, vp);
+
+ /* do regular pass first: */
+
+ if (unlikely(ctx->stats_users > 0)) {
+ ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+ ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+ }
+
+ emit.binning_pass = false;
+ emit.dirty = dirty;
+ draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
+
+ /* and now binning pass: */
+ emit.binning_pass = true;
+ emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+ emit.vs = NULL; /* we changed key so need to refetch vs */
+ emit.fs = NULL;
+ draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
+
+ fd_context_all_clean(ctx);
+
+ return true;
}
void
-fd3_draw_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd3_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->draw_vbo = fd3_draw_vbo;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd3_draw_vbo;
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_helpers.h"
#include "util/format/u_format.h"
+#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "util/u_viewport.h"
-#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
-#include "fd3_emit.h"
#include "fd3_blend.h"
#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_format.h"
#include "fd3_program.h"
#include "fd3_rasterizer.h"
#include "fd3_texture.h"
-#include "fd3_format.h"
#include "fd3_zsa.h"
#define emit_const_user fd3_emit_const_user
-#define emit_const_bo fd3_emit_const_bo
+#define emit_const_bo fd3_emit_const_bo
#include "ir3_const.h"
static const enum adreno_state_block sb[] = {
- [MESA_SHADER_VERTEX] = SB_VERT_SHADER,
- [MESA_SHADER_FRAGMENT] = SB_FRAG_SHADER,
+ [MESA_SHADER_VERTEX] = SB_VERT_SHADER,
+ [MESA_SHADER_FRAGMENT] = SB_FRAG_SHADER,
};
/* regid: base const register
*/
static void
fd3_emit_const_user(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v,
- uint32_t regid, uint32_t sizedwords, const uint32_t *dwords)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
{
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + sizedwords);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) |
- CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
- for (int i = 0; i < sizedwords; i++)
- OUT_RING(ring, dwords[i]);
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sizedwords);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid / 2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(sizedwords / 2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ for (int i = 0; i < sizedwords; i++)
+ OUT_RING(ring, dwords[i]);
}
static void
-fd3_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
- uint32_t regid, uint32_t offset, uint32_t sizedwords,
- struct fd_bo *bo)
+fd3_emit_const_bo(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
{
- uint32_t dst_off = regid / 2;
- /* The blob driver aligns all const uploads dst_off to 64. We've been
- * successfully aligning to 8 vec4s as const_upload_unit so far with no
- * ill effects.
- */
- assert(dst_off % 16 == 0);
- uint32_t num_unit = sizedwords / 2;
- assert(num_unit % 2 == 0);
-
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT3(ring, CP_LOAD_STATE, 2);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) |
- CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) |
- CP_LOAD_STATE_0_NUM_UNIT(num_unit));
- OUT_RELOC(ring, bo, offset,
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+ uint32_t dst_off = regid / 2;
+ /* The blob driver aligns all const uploads dst_off to 64. We've been
+ * successfully aligning to 8 vec4s as const_upload_unit so far with no
+ * ill effects.
+ */
+ assert(dst_off % 16 == 0);
+ uint32_t num_unit = sizedwords / 2;
+ assert(num_unit % 2 == 0);
+
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num_unit));
+ OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
}
static void
fd3_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type,
- uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+ uint32_t regid, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- uint32_t anum = align(num, 4);
- uint32_t i;
-
- debug_assert((regid % 4) == 0);
-
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
- CP_LOAD_STATE_0_NUM_UNIT(anum/2));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 0; i < num; i++) {
- if (bos[i]) {
- OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
- } else {
- OUT_RING(ring, 0xbad00000 | (i << 16));
- }
- }
-
- for (; i < anum; i++)
- OUT_RING(ring, 0xffffffff);
+ uint32_t anum = align(num, 4);
+ uint32_t i;
+
+ debug_assert((regid % 4) == 0);
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid / 2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(anum / 2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ }
+ }
+
+ for (; i < anum; i++)
+ OUT_RING(ring, 0xffffffff);
}
static bool
is_stateobj(struct fd_ringbuffer *ring)
{
- return false;
+ return false;
}
static void
-emit_const_ptrs(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
+ uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- /* TODO inline this */
- assert(dst_offset + num <= v->constlen * 4);
- fd3_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
+ /* TODO inline this */
+ assert(dst_offset + num <= v->constlen * 4);
+ fd3_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
}
-#define VERT_TEX_OFF 0
-#define FRAG_TEX_OFF 16
-#define BASETABLE_SZ A3XX_MAX_MIP_LEVELS
+#define VERT_TEX_OFF 0
+#define FRAG_TEX_OFF 16
+#define BASETABLE_SZ A3XX_MAX_MIP_LEVELS
static void
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+ enum adreno_state_block sb, struct fd_texture_stateobj *tex)
{
- static const unsigned tex_off[] = {
- [SB_VERT_TEX] = VERT_TEX_OFF,
- [SB_FRAG_TEX] = FRAG_TEX_OFF,
- };
- static const enum adreno_state_block mipaddr[] = {
- [SB_VERT_TEX] = SB_VERT_MIPADDR,
- [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
- };
- static const uint32_t bcolor_reg[] = {
- [SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
- [SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
- };
- struct fd3_context *fd3_ctx = fd3_context(ctx);
- bool needs_border = false;
- unsigned i, j;
-
- if (tex->num_samplers > 0) {
- /* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < tex->num_samplers; i++) {
- static const struct fd3_sampler_stateobj dummy_sampler = {};
- const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
- fd3_sampler_stateobj(tex->samplers[i]) :
- &dummy_sampler;
-
- OUT_RING(ring, sampler->texsamp0);
- OUT_RING(ring, sampler->texsamp1);
-
- needs_border |= sampler->needs_border;
- }
- }
-
- if (tex->num_textures > 0) {
- /* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < tex->num_textures; i++) {
- static const struct fd3_pipe_sampler_view dummy_view = {};
- const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
- fd3_pipe_sampler_view(tex->textures[i]) :
- &dummy_view;
- OUT_RING(ring, view->texconst0);
- OUT_RING(ring, view->texconst1);
- OUT_RING(ring, view->texconst2 |
- A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
- OUT_RING(ring, view->texconst3);
- }
-
- /* emit mipaddrs: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
- CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < tex->num_textures; i++) {
- static const struct fd3_pipe_sampler_view dummy_view = {
- .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */
- .base.u.tex.first_level = 1,
- };
- const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
- fd3_pipe_sampler_view(tex->textures[i]) :
- &dummy_view;
- struct fd_resource *rsc = fd_resource(view->base.texture);
- if (rsc && rsc->b.b.target == PIPE_BUFFER) {
- OUT_RELOC(ring, rsc->bo, view->base.u.buf.offset, 0, 0);
- j = 1;
- } else {
- unsigned start = fd_sampler_first_level(&view->base);
- unsigned end = fd_sampler_last_level(&view->base);
-
- for (j = 0; j < (end - start + 1); j++) {
- struct fdl_slice *slice = fd_resource_slice(rsc, j + start);
- OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
- }
- }
-
- /* pad the remaining entries w/ null: */
- for (; j < BASETABLE_SZ; j++) {
- OUT_RING(ring, 0x00000000);
- }
- }
- }
-
- if (needs_border) {
- unsigned off;
- void *ptr;
-
- u_upload_alloc(fd3_ctx->border_color_uploader,
- 0, BORDER_COLOR_UPLOAD_SIZE,
- BORDER_COLOR_UPLOAD_SIZE, &off,
- &fd3_ctx->border_color_buf,
- &ptr);
-
- fd_setup_border_colors(tex, ptr, tex_off[sb]);
-
- OUT_PKT0(ring, bcolor_reg[sb], 1);
- OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0);
-
- u_upload_unmap(fd3_ctx->border_color_uploader);
- }
+ static const unsigned tex_off[] = {
+ [SB_VERT_TEX] = VERT_TEX_OFF,
+ [SB_FRAG_TEX] = FRAG_TEX_OFF,
+ };
+ static const enum adreno_state_block mipaddr[] = {
+ [SB_VERT_TEX] = SB_VERT_MIPADDR,
+ [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
+ };
+ static const uint32_t bcolor_reg[] = {
+ [SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+ [SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+ };
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ bool needs_border = false;
+ unsigned i, j;
+
+ if (tex->num_samplers > 0) {
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_samplers; i++) {
+ static const struct fd3_sampler_stateobj dummy_sampler = {};
+ const struct fd3_sampler_stateobj *sampler =
+ tex->samplers[i] ? fd3_sampler_stateobj(tex->samplers[i])
+ : &dummy_sampler;
+
+ OUT_RING(ring, sampler->texsamp0);
+ OUT_RING(ring, sampler->texsamp1);
+
+ needs_border |= sampler->needs_border;
+ }
+ }
+
+ if (tex->num_textures > 0) {
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ static const struct fd3_pipe_sampler_view dummy_view = {};
+ const struct fd3_pipe_sampler_view *view =
+ tex->textures[i] ? fd3_pipe_sampler_view(tex->textures[i])
+ : &dummy_view;
+ OUT_RING(ring, view->texconst0);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring,
+ view->texconst2 | A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, view->texconst3);
+ }
+
+ /* emit mipaddrs: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
+ OUT_RING(ring,
+ CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
+ CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ static const struct fd3_pipe_sampler_view dummy_view = {
+ .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */
+ .base.u.tex.first_level = 1,
+ };
+ const struct fd3_pipe_sampler_view *view =
+ tex->textures[i] ? fd3_pipe_sampler_view(tex->textures[i])
+ : &dummy_view;
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ if (rsc && rsc->b.b.target == PIPE_BUFFER) {
+ OUT_RELOC(ring, rsc->bo, view->base.u.buf.offset, 0, 0);
+ j = 1;
+ } else {
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);
+
+ for (j = 0; j < (end - start + 1); j++) {
+ struct fdl_slice *slice = fd_resource_slice(rsc, j + start);
+ OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ }
+ }
+
+ /* pad the remaining entries w/ null: */
+ for (; j < BASETABLE_SZ; j++) {
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+ }
+
+ if (needs_border) {
+ unsigned off;
+ void *ptr;
+
+ u_upload_alloc(fd3_ctx->border_color_uploader, 0,
+ BORDER_COLOR_UPLOAD_SIZE, BORDER_COLOR_UPLOAD_SIZE, &off,
+ &fd3_ctx->border_color_buf, &ptr);
+
+ fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
+ OUT_PKT0(ring, bcolor_reg[sb], 1);
+ OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0);
+
+ u_upload_unmap(fd3_ctx->border_color_uploader);
+ }
}
/* emit texture state for mem->gmem restore operation.. eventually it would
*/
void
fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- struct pipe_surface **psurf,
- int bufs)
+ struct pipe_surface **psurf, int bufs)
{
- int i, j;
-
- /* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(bufs));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < bufs; i++) {
- OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
- A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
- A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
- A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
- A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
- OUT_RING(ring, 0x00000000);
- }
-
- /* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(bufs));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < bufs; i++) {
- if (!psurf[i]) {
- OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
- A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
- A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
- A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
- A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
- OUT_RING(ring, 0x00000000);
- continue;
- }
-
- struct fd_resource *rsc = fd_resource(psurf[i]->texture);
- enum pipe_format format = fd_gmem_restore_format(psurf[i]->format);
- /* The restore blit_zs shader expects stencil in sampler 0, and depth
- * in sampler 1
- */
- if (rsc->stencil && i == 0) {
- rsc = rsc->stencil;
- format = fd_gmem_restore_format(rsc->b.b.format);
- }
-
- /* note: PIPE_BUFFER disallowed for surfaces */
- unsigned lvl = psurf[i]->u.tex.level;
-
- debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
-
- OUT_RING(ring, A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
- A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
- A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
- fd3_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
- OUT_RING(ring, A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
- A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
- OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |
- A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
- OUT_RING(ring, 0x00000000);
- }
-
- /* emit mipaddrs: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
- CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < bufs; i++) {
- if (psurf[i]) {
- struct fd_resource *rsc = fd_resource(psurf[i]->texture);
- /* Matches above logic for blit_zs shader */
- if (rsc->stencil && i == 0)
- rsc = rsc->stencil;
- unsigned lvl = psurf[i]->u.tex.level;
- uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- }
-
- /* pad the remaining entries w/ null: */
- for (j = 1; j < BASETABLE_SZ; j++) {
- OUT_RING(ring, 0x00000000);
- }
- }
+ int i, j;
+
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+ CP_LOAD_STATE_0_NUM_UNIT(bufs));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < bufs; i++) {
+ OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+ CP_LOAD_STATE_0_NUM_UNIT(bufs));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < bufs; i++) {
+ if (!psurf[i]) {
+ OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+ A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, 0x00000000);
+ continue;
+ }
+
+ struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+ enum pipe_format format = fd_gmem_restore_format(psurf[i]->format);
+ /* The restore blit_zs shader expects stencil in sampler 0, and depth
+ * in sampler 1
+ */
+ if (rsc->stencil && i == 0) {
+ rsc = rsc->stencil;
+ format = fd_gmem_restore_format(rsc->b.b.format);
+ }
+
+ /* note: PIPE_BUFFER disallowed for surfaces */
+ unsigned lvl = psurf[i]->u.tex.level;
+
+ debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
+
+ OUT_RING(ring, A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
+ A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
+ A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+ fd3_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
+ OUT_RING(ring, A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
+ A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
+ OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |
+ A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* emit mipaddrs: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
+ CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < bufs; i++) {
+ if (psurf[i]) {
+ struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+ /* Matches above logic for blit_zs shader */
+ if (rsc->stencil && i == 0)
+ rsc = rsc->stencil;
+ unsigned lvl = psurf[i]->u.tex.level;
+ uint32_t offset =
+ fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* pad the remaining entries w/ null: */
+ for (j = 1; j < BASETABLE_SZ; j++) {
+ OUT_RING(ring, 0x00000000);
+ }
+ }
}
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
{
- int32_t i, j, last = -1;
- uint32_t total_in = 0;
- const struct fd_vertex_state *vtx = emit->vtx;
- const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0);
- unsigned instance_regid = regid(63, 0);
- unsigned vtxcnt_regid = regid(63, 0);
-
- /* Note that sysvals come *after* normal inputs: */
- for (i = 0; i < vp->inputs_count; i++) {
- if (!vp->inputs[i].compmask)
- continue;
- if (vp->inputs[i].sysval) {
- switch(vp->inputs[i].slot) {
- case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- vertex_regid = vp->inputs[i].regid;
- break;
- case SYSTEM_VALUE_INSTANCE_ID:
- instance_regid = vp->inputs[i].regid;
- break;
- case SYSTEM_VALUE_VERTEX_CNT:
- vtxcnt_regid = vp->inputs[i].regid;
- break;
- default:
- unreachable("invalid system value");
- break;
- }
- } else if (i < vtx->vtx->num_elements) {
- last = i;
- }
- }
-
- for (i = 0, j = 0; i <= last; i++) {
- assert(!vp->inputs[i].sysval);
- if (vp->inputs[i].compmask) {
- struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
- const struct pipe_vertex_buffer *vb =
- &vtx->vertexbuf.vb[elem->vertex_buffer_index];
- struct fd_resource *rsc = fd_resource(vb->buffer.resource);
- enum pipe_format pfmt = elem->src_format;
- enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
- bool switchnext = (i != last) ||
- (vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0)) ||
- (vtxcnt_regid != regid(63, 0));
- bool isint = util_format_is_pure_integer(pfmt);
- uint32_t off = vb->buffer_offset + elem->src_offset;
- uint32_t fs = util_format_get_blocksize(pfmt);
+ int32_t i, j, last = -1;
+ uint32_t total_in = 0;
+ const struct fd_vertex_state *vtx = emit->vtx;
+ const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
+
+ /* Note that sysvals come *after* normal inputs: */
+ for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
+ if (vp->inputs[i].sysval) {
+ switch (vp->inputs[i].slot) {
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ vertex_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ instance_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_VERTEX_CNT:
+ vtxcnt_regid = vp->inputs[i].regid;
+ break;
+ default:
+ unreachable("invalid system value");
+ break;
+ }
+ } else if (i < vtx->vtx->num_elements) {
+ last = i;
+ }
+ }
+
+ for (i = 0, j = 0; i <= last; i++) {
+ assert(!vp->inputs[i].sysval);
+ if (vp->inputs[i].compmask) {
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ const struct pipe_vertex_buffer *vb =
+ &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+ struct fd_resource *rsc = fd_resource(vb->buffer.resource);
+ enum pipe_format pfmt = elem->src_format;
+ enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
+ bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+ bool isint = util_format_is_pure_integer(pfmt);
+ uint32_t off = vb->buffer_offset + elem->src_offset;
+ uint32_t fs = util_format_get_blocksize(pfmt);
#ifdef DEBUG
- /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
- * should mesa/st be protecting us from this?
- */
- if (off > fd_bo_size(rsc->bo))
- continue;
+ /* see
+ * dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
+ * should mesa/st be protecting us from this?
+ */
+ if (off > fd_bo_size(rsc->bo))
+ continue;
#endif
- debug_assert(fmt != VFMT_NONE);
-
- OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
- OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
- COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
- A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
- COND(elem->instance_divisor, A3XX_VFD_FETCH_INSTR_0_INSTANCED) |
- A3XX_VFD_FETCH_INSTR_0_STEPRATE(MAX2(1, elem->instance_divisor)));
- OUT_RELOC(ring, rsc->bo, off, 0, 0);
-
- OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
- OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
- A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) |
- A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
- A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
- A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(isint, A3XX_VFD_DECODE_INSTR_INT) |
- COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
-
- total_in += util_bitcount(vp->inputs[i].compmask);
- j++;
- }
- }
-
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if (last < 0) {
- /* just recycle the shader bo, we just need to point to *something*
- * valid:
- */
- struct fd_bo *dummy_vbo = vp->bo;
- bool switchnext = (vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0)) ||
- (vtxcnt_regid != regid(63, 0));
-
- OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
- OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
- COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
- A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
- A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
-
- OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
- OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
- A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
- A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
- A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
- A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
- A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
- A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
-
- total_in = 1;
- j = 1;
- }
-
- OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
- OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
- A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
- A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
- A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
- OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
- A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
- A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
-
- OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
- OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
- A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
+ debug_assert(fmt != VFMT_NONE);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
+ COND(elem->instance_divisor,
+ A3XX_VFD_FETCH_INSTR_0_INSTANCED) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(
+ MAX2(1, elem->instance_divisor)));
+ OUT_RELOC(ring, rsc->bo, off, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring,
+ A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) |
+ A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(isint, A3XX_VFD_DECODE_INSTR_INT) |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in += util_bitcount(vp->inputs[i].compmask);
+ j++;
+ }
+ }
+
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+ A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A3XX_VFD_DECODE_INSTR_REGID(regid(0, 0)) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+ OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+ A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+ A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+ A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+ OUT_RING(ring,
+ A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+ A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
}
void
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd3_emit *emit)
+ struct fd3_emit *emit)
{
- const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
- const struct ir3_shader_variant *fp = fd3_emit_get_fp(emit);
- const enum fd_dirty_3d_state dirty = emit->dirty;
-
- emit_marker(ring, 5);
-
- if (dirty & FD_DIRTY_SAMPLE_MASK) {
- OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
- A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
- A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
- }
-
- if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
- !emit->binning_pass) {
- uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
- fd3_blend_stateobj(ctx->blend)->rb_render_control;
-
- val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
- val |= COND(fp->fragcoord_compmask != 0,
- A3XX_RB_RENDER_CONTROL_COORD_MASK(fp->fragcoord_compmask));
- val |= COND(ctx->rasterizer->rasterizer_discard,
- A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
-
- /* I suppose if we needed to (which I don't *think* we need
- * to), we could emit this for binning pass too. But we
- * would need to keep a different patch-list for binning
- * vs render pass.
- */
-
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RINGP(ring, val, &ctx->batch->rbrc_patches);
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
- struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
-
- OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
- OUT_RING(ring, zsa->rb_alpha_ref);
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, zsa->rb_stencil_control);
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
- OUT_RING(ring, zsa->rb_stencilrefmask |
- A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
- OUT_RING(ring, zsa->rb_stencilrefmask_bf |
- A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
- if (fp->writes_pos) {
- val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
- val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
- }
- if (fp->no_earlyz || fp->has_kill) {
- val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
- }
- if (!ctx->rasterizer->depth_clip_near) {
- val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
- }
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, val);
- }
-
- if (dirty & FD_DIRTY_RASTERIZER) {
- struct fd3_rasterizer_stateobj *rasterizer =
- fd3_rasterizer_stateobj(ctx->rasterizer);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, rasterizer->gras_su_mode_control);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
- OUT_RING(ring, rasterizer->gras_su_point_minmax);
- OUT_RING(ring, rasterizer->gras_su_point_size);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
- }
-
- if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
- ->gras_cl_clip_cntl;
- uint8_t planes = ctx->rasterizer->clip_plane_enable;
- val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL),
- A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);
- val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL),
- A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTER);
- val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID),
- A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTROID);
- val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID),
- A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTROID);
- /* docs say enable at least one of IJ_PERSP_CENTER/CENTROID when fragcoord is used */
- val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD),
- A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);
- val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
- val |= COND(fp->fragcoord_compmask != 0, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
- A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
- if (!emit->key.key.ucp_enables)
- val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
- MIN2(util_bitcount(planes), 6));
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, val);
- }
-
- if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
- uint32_t planes = ctx->rasterizer->clip_plane_enable;
- int count = 0;
-
- if (emit->key.key.ucp_enables)
- planes = 0;
-
- while (planes && count < 6) {
- int i = ffs(planes) - 1;
-
- planes &= ~(1U << i);
- fd_wfi(ctx->batch, ring);
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
- OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
- OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
- OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
- OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
- }
- }
-
- /* NOTE: since primitive_restart is not actually part of any
- * state object, we need to make sure that we always emit
- * PRIM_VTX_CNTL.. either that or be more clever and detect
- * when it changes.
- */
- if (emit->info) {
- const struct pipe_draw_info *info = emit->info;
- uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
- ->pc_prim_vtx_cntl;
-
- if (!emit->binning_pass) {
- uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
- if (stride_in_vpc > 0)
- stride_in_vpc = MAX2(stride_in_vpc, 2);
- val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
- }
-
- if (info->index_size && info->primitive_restart) {
- val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
- }
-
- val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
-
- OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, val);
- }
-
- if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) {
- struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
- int minx = scissor->minx;
- int miny = scissor->miny;
- int maxx = scissor->maxx;
- int maxy = scissor->maxy;
-
- /* Unfortunately there is no separate depth clip disable, only an all
- * or nothing deal. So when we disable clipping, we must handle the
- * viewport clip via scissors.
- */
- if (!ctx->rasterizer->depth_clip_near) {
- struct pipe_viewport_state *vp = &ctx->viewport;
- minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0])));
- miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1])));
- maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0])));
- maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1])));
- }
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny));
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1));
-
- ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, minx);
- ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, miny);
- ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, maxx);
- ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, maxy);
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- fd_wfi(ctx->batch, ring);
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
- }
-
- if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
- float zmin, zmax;
- int depth = 24;
- if (ctx->batch->framebuffer.zsbuf) {
- depth = util_format_get_component_bits(
- pipe_surface_format(ctx->batch->framebuffer.zsbuf),
- UTIL_FORMAT_COLORSPACE_ZS, 0);
- }
- util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
- &zmin, &zmax);
-
- OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
- if (depth == 32) {
- OUT_RING(ring, (uint32_t)(zmin * 0xffffffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffffffff));
- } else if (depth == 16) {
- OUT_RING(ring, (uint32_t)(zmin * 0xffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffff));
- } else {
- OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
- }
- }
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- int nr_cbufs = pfb->nr_cbufs;
- if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
- A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
- nr_cbufs++;
- fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
- }
-
- /* TODO we should not need this or fd_wfi() before emit_constants():
- */
- OUT_PKT3(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, HLSQ_FLUSH);
-
- if (!emit->skip_consts) {
- ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
- if (!emit->binning_pass)
- ir3_emit_fs_consts(fp, ring, ctx);
- }
-
- if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
- struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
- uint32_t i;
-
- for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
- enum pipe_format format =
- pipe_surface_format(ctx->batch->framebuffer.cbufs[i]);
- const struct util_format_description *desc =
- util_format_description(format);
- bool is_float = util_format_is_float(format);
- bool is_int = util_format_is_pure_integer(format);
- bool has_alpha = util_format_has_alpha(format);
- uint32_t control = blend->rb_mrt[i].control;
-
- if (is_int) {
- control &= (A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK |
- A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK);
- control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
- }
-
- if (format == PIPE_FORMAT_NONE)
- control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
-
- if (!has_alpha) {
- control &= ~A3XX_RB_MRT_CONTROL_BLEND2;
- }
-
- if (format && util_format_get_component_bits(
- format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) {
- const struct pipe_rt_blend_state *rt;
- if (ctx->blend->independent_blend_enable)
- rt = &ctx->blend->rt[i];
- else
- rt = &ctx->blend->rt[0];
-
- if (!util_format_colormask_full(desc, rt->colormask))
- control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
- }
-
- OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, control);
-
- OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control |
- COND(!is_float, A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE));
- }
- }
-
- if (dirty & FD_DIRTY_BLEND_COLOR) {
- struct pipe_blend_color *bcolor = &ctx->blend_color;
- OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
- OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
- A3XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
- OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
- A3XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
- OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
- A3XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
- OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
- A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
- }
-
- if (dirty & FD_DIRTY_TEX)
- fd_wfi(ctx->batch, ring);
-
- if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX)
- emit_textures(ctx, ring, SB_VERT_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX)
- emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+ const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
+ const struct ir3_shader_variant *fp = fd3_emit_get_fp(emit);
+ const enum fd_dirty_3d_state dirty = emit->dirty;
+
+ emit_marker(ring, 5);
+
+ if (dirty & FD_DIRTY_SAMPLE_MASK) {
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
+ }
+
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG |
+ FD_DIRTY_BLEND_DUAL)) &&
+ !emit->binning_pass) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+ fd3_blend_stateobj(ctx->blend)->rb_render_control;
+
+ val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
+ val |= COND(fp->fragcoord_compmask != 0,
+ A3XX_RB_RENDER_CONTROL_COORD_MASK(fp->fragcoord_compmask));
+ val |= COND(ctx->rasterizer->rasterizer_discard,
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+
+ /* I suppose if we needed to (which I don't *think* we need
+ * to), we could emit this for binning pass too. But we
+ * would need to keep a different patch-list for binning
+ * vs render pass.
+ */
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RINGP(ring, val, &ctx->batch->rbrc_patches);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
+ OUT_RING(ring, zsa->rb_alpha_ref);
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_stencil_control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
+ if (fp->writes_pos) {
+ val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
+ val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+ if (fp->no_earlyz || fp->has_kill) {
+ val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+ if (!ctx->rasterizer->depth_clip_near) {
+ val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
+ }
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, val);
+ }
+
+ if (dirty & FD_DIRTY_RASTERIZER) {
+ struct fd3_rasterizer_stateobj *rasterizer =
+ fd3_rasterizer_stateobj(ctx->rasterizer);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, rasterizer->gras_su_mode_control);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, rasterizer->gras_su_point_minmax);
+ OUT_RING(ring, rasterizer->gras_su_point_size);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+ }
+
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ uint32_t val =
+ fd3_rasterizer_stateobj(ctx->rasterizer)->gras_cl_clip_cntl;
+ uint8_t planes = ctx->rasterizer->clip_plane_enable;
+ val |= CONDREG(
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL),
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);
+ val |= CONDREG(
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL),
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTER);
+ val |= CONDREG(
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID),
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTROID);
+ val |= CONDREG(
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID),
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTROID);
+ /* docs say enable at least one of IJ_PERSP_CENTER/CENTROID when fragcoord
+ * is used */
+ val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD),
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);
+ val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
+ val |=
+ COND(fp->fragcoord_compmask != 0,
+ A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+ if (!emit->key.key.ucp_enables)
+ val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+ MIN2(util_bitcount(planes), 6));
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, val);
+ }
+
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
+ uint32_t planes = ctx->rasterizer->clip_plane_enable;
+ int count = 0;
+
+ if (emit->key.key.ucp_enables)
+ planes = 0;
+
+ while (planes && count < 6) {
+ int i = ffs(planes) - 1;
+
+ planes &= ~(1U << i);
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+ OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+ OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+ OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+ OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+ }
+ }
+
+ /* NOTE: since primitive_restart is not actually part of any
+ * state object, we need to make sure that we always emit
+ * PRIM_VTX_CNTL.. either that or be more clever and detect
+ * when it changes.
+ */
+ if (emit->info) {
+ const struct pipe_draw_info *info = emit->info;
+ uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)->pc_prim_vtx_cntl;
+
+ if (!emit->binning_pass) {
+ uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
+ if (stride_in_vpc > 0)
+ stride_in_vpc = MAX2(stride_in_vpc, 2);
+ val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
+ }
+
+ if (info->index_size && info->primitive_restart) {
+ val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
+ }
+
+ val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, val);
+ }
+
+ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) {
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+ int minx = scissor->minx;
+ int miny = scissor->miny;
+ int maxx = scissor->maxx;
+ int maxy = scissor->maxy;
+
+ /* Unfortunately there is no separate depth clip disable, only an all
+ * or nothing deal. So when we disable clipping, we must handle the
+ * viewport clip via scissors.
+ */
+ if (!ctx->rasterizer->depth_clip_near) {
+ struct pipe_viewport_state *vp = &ctx->viewport;
+ minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1])));
+ maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0])));
+ maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1])));
+ }
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1));
+
+ ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, minx);
+ ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, miny);
+ ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, maxx);
+ ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring,
+ A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
+ OUT_RING(ring,
+ A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
+ }
+
+ if (dirty &
+ (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ float zmin, zmax;
+ int depth = 24;
+ if (ctx->batch->framebuffer.zsbuf) {
+ depth = util_format_get_component_bits(
+ pipe_surface_format(ctx->batch->framebuffer.zsbuf),
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ }
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
+ if (depth == 32) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffffff));
+ } else if (depth == 16) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+ } else {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+ }
+ }
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ int nr_cbufs = pfb->nr_cbufs;
+ if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+ A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+ nr_cbufs++;
+ fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
+ }
+
+ /* TODO we should not need this or fd_wfi() before emit_constants():
+ */
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
+
+ if (!emit->skip_consts) {
+ ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
+ if (!emit->binning_pass)
+ ir3_emit_fs_consts(fp, ring, ctx);
+ }
+
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+ struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
+ enum pipe_format format =
+ pipe_surface_format(ctx->batch->framebuffer.cbufs[i]);
+ const struct util_format_description *desc =
+ util_format_description(format);
+ bool is_float = util_format_is_float(format);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+
+ if (is_int) {
+ control &= (A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK);
+ control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (format == PIPE_FORMAT_NONE)
+ control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+
+ if (!has_alpha) {
+ control &= ~A3XX_RB_MRT_CONTROL_BLEND2;
+ }
+
+ if (format && util_format_get_component_bits(
+ format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) {
+ const struct pipe_rt_blend_state *rt;
+ if (ctx->blend->independent_blend_enable)
+ rt = &ctx->blend->rt[i];
+ else
+ rt = &ctx->blend->rt[0];
+
+ if (!util_format_colormask_full(desc, rt->colormask))
+ control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring,
+ blend->rb_mrt[i].blend_control |
+ COND(!is_float, A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE));
+ }
+ }
+
+ if (dirty & FD_DIRTY_BLEND_COLOR) {
+ struct pipe_blend_color *bcolor = &ctx->blend_color;
+ OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+ OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
+ A3XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
+ OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
+ A3XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
+ OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
+ A3XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
+ OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
+ A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
+ }
+
+ if (dirty & FD_DIRTY_TEX)
+ fd_wfi(ctx->batch, ring);
+
+ if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX)
+ emit_textures(ctx, ring, SB_VERT_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX)
+ emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
void
fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = batch->ctx;
- struct fd3_context *fd3_ctx = fd3_context(ctx);
- int i;
-
- if (ctx->screen->gpu_id == 320) {
- OUT_PKT3(ring, CP_REG_RMW, 3);
- OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
- OUT_RING(ring, 0xfffcffff);
- OUT_RING(ring, 0x00000000);
- }
-
- fd_wfi(batch, ring);
- OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
- OUT_RING(ring, 0x00007fff);
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_PARAM_REG, 3);
- OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
- OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_PARAM_REG, 3);
- OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
- OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */
-
- OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
- OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
- OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
- A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
- A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
- OUT_RING(ring, 0x00000000); /* RB_ALPHA_REF */
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
- OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
- A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_TSE_DEBUG_ECO, 1);
- OUT_RING(ring, 0x00000001); /* GRAS_TSE_DEBUG_ECO */
-
- OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
- OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
- A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
- A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF));
-
- OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
- OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
- A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
- A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF));
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
- OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */
- OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */
-
- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
- OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */
-
- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
- OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */
-
- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
- OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */
-
- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
- OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
- OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
- A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
- OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
- A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
-
- fd3_emit_cache_flush(batch, ring);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
- OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */
- OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */
-
- OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
-
- OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
- A3XX_RB_WINDOW_OFFSET_Y(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
- OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(0) |
- A3XX_RB_BLEND_RED_FLOAT(0.0));
- OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(0) |
- A3XX_RB_BLEND_GREEN_FLOAT(0.0));
- OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(0) |
- A3XX_RB_BLEND_BLUE_FLOAT(0.0));
- OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
- A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
-
- for (i = 0; i < 6; i++) {
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
- OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */
- OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */
- OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */
- OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
- }
-
- OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
-
- fd_event_write(batch, ring, CACHE_FLUSH);
-
- if (is_a3xx_p0(ctx->screen)) {
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
- OUT_RING(ring, 0); /* NumIndices */
- }
-
- OUT_PKT3(ring, CP_NOP, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- fd_wfi(batch, ring);
-
- fd_hw_query_enable(batch, ring);
+ struct fd_context *ctx = batch->ctx;
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ int i;
+
+ if (ctx->screen->gpu_id == 320) {
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
+ OUT_RING(ring, 0xfffcffff);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ fd_wfi(batch, ring);
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_PARAM_REG, 3);
+ OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */
+ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */
+ OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_PARAM_REG, 3);
+ OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */
+ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */
+ OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */
+
+ OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+ OUT_RING(ring, 0x00000000); /* RB_ALPHA_REF */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_TSE_DEBUG_ECO, 1);
+ OUT_RING(ring, 0x00000001); /* GRAS_TSE_DEBUG_ECO */
+
+ OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
+ OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
+ A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
+ A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ *
+ VERT_TEX_OFF));
+
+ OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
+ OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
+ A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
+ A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ *
+ FRAG_TEX_OFF));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
+ OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */
+ OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
+ OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
+ OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
+ A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
+ OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
+ A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
+
+ fd3_emit_cache_flush(batch, ring);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */
+ OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */
+
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+ OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(0) | A3XX_RB_BLEND_RED_FLOAT(0.0));
+ OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(0) | A3XX_RB_BLEND_GREEN_FLOAT(0.0));
+ OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(0) | A3XX_RB_BLEND_BLUE_FLOAT(0.0));
+ OUT_RING(ring,
+ A3XX_RB_BLEND_ALPHA_UINT(0xff) | A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
+
+ for (i = 0; i < 6; i++) {
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
+ }
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ fd_event_write(batch, ring, CACHE_FLUSH);
+
+ if (is_a3xx_p0(ctx->screen)) {
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN,
+ IGNORE_VISIBILITY, 0));
+ OUT_RING(ring, 0); /* NumIndices */
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ fd_wfi(batch, ring);
+
+ fd_hw_query_enable(batch, ring);
}
void
fd3_emit_init_screen(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->emit_ib = fd3_emit_ib;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->emit_ib = fd3_emit_ib;
}
void
#include "pipe/p_context.h"
-#include "freedreno_batch.h"
-#include "freedreno_context.h"
#include "fd3_format.h"
#include "fd3_program.h"
+#include "freedreno_batch.h"
+#include "freedreno_context.h"
#include "ir3_cache.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- struct pipe_surface **psurf, int bufs);
+ struct pipe_surface **psurf, int bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd3_emit {
- struct pipe_debug_callback *debug;
- const struct fd_vertex_state *vtx;
- const struct fd3_program_state *prog;
- const struct pipe_draw_info *info;
- const struct pipe_draw_indirect_info *indirect;
- const struct pipe_draw_start_count *draw;
- bool binning_pass;
- struct ir3_cache_key key;
- enum fd_dirty_3d_state dirty;
-
- uint32_t sprite_coord_enable;
- bool sprite_coord_mode;
- bool rasterflat;
- bool skip_consts;
-
- /* cached to avoid repeated lookups of same variants: */
- const struct ir3_shader_variant *vs, *fs;
+ struct pipe_debug_callback *debug;
+ const struct fd_vertex_state *vtx;
+ const struct fd3_program_state *prog;
+ const struct pipe_draw_info *info;
+ const struct pipe_draw_indirect_info *indirect;
+ const struct pipe_draw_start_count *draw;
+ bool binning_pass;
+ struct ir3_cache_key key;
+ enum fd_dirty_3d_state dirty;
+
+ uint32_t sprite_coord_enable;
+ bool sprite_coord_mode;
+ bool rasterflat;
+ bool skip_consts;
+
+ /* cached to avoid repeated lookups of same variants: */
+ const struct ir3_shader_variant *vs, *fs;
};
static inline const struct ir3_shader_variant *
fd3_emit_get_vp(struct fd3_emit *emit)
{
- if (!emit->vs) {
- emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
- }
- return emit->vs;
+ if (!emit->vs) {
+ emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
+ }
+ return emit->vs;
}
static inline const struct ir3_shader_variant *
fd3_emit_get_fp(struct fd3_emit *emit)
{
- if (!emit->fs) {
- if (emit->binning_pass) {
- /* use dummy stateobj to simplify binning vs non-binning: */
- static const struct ir3_shader_variant binning_fs = {};
- emit->fs = &binning_fs;
- } else {
- emit->fs = emit->prog->fs;
- }
- }
- return emit->fs;
+ if (!emit->fs) {
+ if (emit->binning_pass) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct ir3_shader_variant binning_fs = {};
+ emit->fs = &binning_fs;
+ } else {
+ emit->fs = emit->prog->fs;
+ }
+ }
+ return emit->fs;
}
-void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) assert_dt;
+void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ struct fd3_emit *emit) assert_dt;
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd3_emit *emit) assert_dt;
+ struct fd3_emit *emit) assert_dt;
-void fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
+void fd3_emit_restore(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt;
void fd3_emit_init_screen(struct pipe_screen *pscreen);
void fd3_emit_init(struct pipe_context *pctx);
static inline void
fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- __OUT_IB(ring, true, target);
+ __OUT_IB(ring, true, target);
}
static inline void
-fd3_emit_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
- assert_dt
+fd3_emit_cache_flush(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt
{
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
}
#endif /* FD3_EMIT_H */
*/
struct fd3_format {
- enum a3xx_vtx_fmt vtx;
- enum a3xx_tex_fmt tex;
- enum a3xx_color_fmt rb;
- enum a3xx_color_swap swap;
- boolean present;
+ enum a3xx_vtx_fmt vtx;
+ enum a3xx_tex_fmt tex;
+ enum a3xx_color_fmt rb;
+ enum a3xx_color_swap swap;
+ boolean present;
};
/* vertex + texture */
-#define VT(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT_ ## fmt, \
- .tex = TFMT_ ## fmt, \
- .rb = RB_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT_##fmt, \
+ .tex = TFMT_##fmt, \
+ .rb = RB_##rbfmt, \
+ .swap = swapfmt}
/* texture-only */
-#define _T(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT_NONE, \
- .tex = TFMT_ ## fmt, \
- .rb = RB_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT_NONE, \
+ .tex = TFMT_##fmt, \
+ .rb = RB_##rbfmt, \
+ .swap = swapfmt}
/* vertex-only */
-#define V_(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT_ ## fmt, \
- .tex = TFMT_NONE, \
- .rb = RB_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT_##fmt, \
+ .tex = TFMT_NONE, \
+ .rb = RB_##rbfmt, \
+ .swap = swapfmt}
/* clang-format off */
static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
enum a3xx_vtx_fmt
fd3_pipe2vtx(enum pipe_format format)
{
- if (!formats[format].present)
- return VFMT_NONE;
- return formats[format].vtx;
+ if (!formats[format].present)
+ return VFMT_NONE;
+ return formats[format].vtx;
}
enum a3xx_tex_fmt
fd3_pipe2tex(enum pipe_format format)
{
- if (!formats[format].present)
- return TFMT_NONE;
- return formats[format].tex;
+ if (!formats[format].present)
+ return TFMT_NONE;
+ return formats[format].tex;
}
enum a3xx_color_fmt
fd3_pipe2color(enum pipe_format format)
{
- if (!formats[format].present)
- return RB_NONE;
- return formats[format].rb;
+ if (!formats[format].present)
+ return RB_NONE;
+ return formats[format].rb;
}
enum a3xx_color_swap
fd3_pipe2swap(enum pipe_format format)
{
- if (!formats[format].present)
- return WZYX;
- return formats[format].swap;
+ if (!formats[format].present)
+ return WZYX;
+ return formats[format].swap;
}
enum a3xx_color_fmt
fd3_fs_output_format(enum pipe_format format)
{
- if (util_format_is_srgb(format))
- return RB_R16G16B16A16_FLOAT;
- switch (format) {
- case PIPE_FORMAT_R16_FLOAT:
- case PIPE_FORMAT_R16G16_FLOAT:
- case PIPE_FORMAT_R11G11B10_FLOAT:
- return RB_R16G16B16A16_FLOAT;
- case PIPE_FORMAT_L8_UNORM:
- return RB_R8G8B8A8_UNORM;
- default:
- return fd3_pipe2color(format);
- }
+ if (util_format_is_srgb(format))
+ return RB_R16G16B16A16_FLOAT;
+ switch (format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ case PIPE_FORMAT_R16G16_FLOAT:
+ case PIPE_FORMAT_R11G11B10_FLOAT:
+ return RB_R16G16B16A16_FLOAT;
+ case PIPE_FORMAT_L8_UNORM:
+ return RB_R8G8B8A8_UNORM;
+ default:
+ return fd3_pipe2color(format);
+ }
}
static inline enum a3xx_tex_swiz
tex_swiz(unsigned swiz)
{
- switch (swiz) {
- default:
- case PIPE_SWIZZLE_X: return A3XX_TEX_X;
- case PIPE_SWIZZLE_Y: return A3XX_TEX_Y;
- case PIPE_SWIZZLE_Z: return A3XX_TEX_Z;
- case PIPE_SWIZZLE_W: return A3XX_TEX_W;
- case PIPE_SWIZZLE_0: return A3XX_TEX_ZERO;
- case PIPE_SWIZZLE_1: return A3XX_TEX_ONE;
- }
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_X:
+ return A3XX_TEX_X;
+ case PIPE_SWIZZLE_Y:
+ return A3XX_TEX_Y;
+ case PIPE_SWIZZLE_Z:
+ return A3XX_TEX_Z;
+ case PIPE_SWIZZLE_W:
+ return A3XX_TEX_W;
+ case PIPE_SWIZZLE_0:
+ return A3XX_TEX_ZERO;
+ case PIPE_SWIZZLE_1:
+ return A3XX_TEX_ONE;
+ }
}
uint32_t
fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+ unsigned swizzle_b, unsigned swizzle_a)
{
- const struct util_format_description *desc =
- util_format_description(format);
- unsigned char swiz[4] = {
- swizzle_r, swizzle_g, swizzle_b, swizzle_a,
- }, rswiz[4];
-
- util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
-
- return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
- A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
- A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
- A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned char swiz[4] =
+ {
+ swizzle_r,
+ swizzle_g,
+ swizzle_b,
+ swizzle_a,
+ },
+ rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+ A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
- unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+ unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
#endif /* FD3_FORMAT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_draw.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd3_gmem.h"
#include "fd3_context.h"
#include "fd3_emit.h"
-#include "fd3_program.h"
#include "fd3_format.h"
+#include "fd3_gmem.h"
+#include "fd3_program.h"
#include "fd3_zsa.h"
static void
-fd3_gmem_emit_set_prog(struct fd_context *ctx, struct fd3_emit *emit, struct fd_program_stateobj *prog)
+fd3_gmem_emit_set_prog(struct fd_context *ctx, struct fd3_emit *emit,
+ struct fd_program_stateobj *prog)
{
- emit->skip_consts = true;
- emit->key.vs = prog->vs;
- emit->key.fs = prog->fs;
- emit->prog = fd3_program_state(ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));
- /* reset the fd3_emit_get_*p cache */
- emit->vs = NULL;
- emit->fs = NULL;
+ emit->skip_consts = true;
+ emit->key.vs = prog->vs;
+ emit->key.fs = prog->fs;
+ emit->prog = fd3_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));
+ /* reset the fd3_emit_get_*p cache */
+ emit->vs = NULL;
+ emit->fs = NULL;
}
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
- struct pipe_surface **bufs, const uint32_t *bases, uint32_t bin_w,
- bool decode_srgb)
+ struct pipe_surface **bufs, const uint32_t *bases, uint32_t bin_w,
+ bool decode_srgb)
{
- enum a3xx_tile_mode tile_mode;
- unsigned i;
-
- for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
- enum pipe_format pformat = 0;
- enum a3xx_color_fmt format = 0;
- enum a3xx_color_swap swap = WZYX;
- bool srgb = false;
- struct fd_resource *rsc = NULL;
- uint32_t stride = 0;
- uint32_t base = 0;
- uint32_t offset = 0;
-
- if (bin_w) {
- tile_mode = TILE_32X32;
- } else {
- tile_mode = LINEAR;
- }
-
- if ((i < nr_bufs) && bufs[i]) {
- struct pipe_surface *psurf = bufs[i];
-
- rsc = fd_resource(psurf->texture);
- pformat = psurf->format;
- /* In case we're drawing to Z32F_S8, the "color" actually goes to
- * the stencil
- */
- if (rsc->stencil) {
- rsc = rsc->stencil;
- pformat = rsc->b.b.format;
- if (bases)
- bases++;
- }
- format = fd3_pipe2color(pformat);
- if (decode_srgb)
- srgb = util_format_is_srgb(pformat);
- else
- pformat = util_format_linear(pformat);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
- swap = rsc->layout.tile_mode ? WZYX : fd3_pipe2swap(pformat);
-
- if (bin_w) {
- stride = bin_w << fdl_cpp_shift(&rsc->layout);
-
- if (bases) {
- base = bases[i];
- }
- } else {
- stride = fd_resource_pitch(rsc, psurf->u.tex.level);
- tile_mode = rsc->layout.tile_mode;
- }
- } else if (i < nr_bufs && bases) {
- base = bases[i];
- }
-
- OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
- OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
- A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
- A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
- A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
- COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
- if (bin_w || (i >= nr_bufs) || !bufs[i]) {
- OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
- } else {
- OUT_RELOC(ring, rsc->bo, offset, 0, -1);
- }
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
- OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
- A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
- fd3_fs_output_format(pformat))));
- }
+ enum a3xx_tile_mode tile_mode;
+ unsigned i;
+
+ for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format pformat = 0;
+ enum a3xx_color_fmt format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ bool srgb = false;
+ struct fd_resource *rsc = NULL;
+ uint32_t stride = 0;
+ uint32_t base = 0;
+ uint32_t offset = 0;
+
+ if (bin_w) {
+ tile_mode = TILE_32X32;
+ } else {
+ tile_mode = LINEAR;
+ }
+
+ if ((i < nr_bufs) && bufs[i]) {
+ struct pipe_surface *psurf = bufs[i];
+
+ rsc = fd_resource(psurf->texture);
+ pformat = psurf->format;
+ /* In case we're drawing to Z32F_S8, the "color" actually goes to
+ * the stencil
+ */
+ if (rsc->stencil) {
+ rsc = rsc->stencil;
+ pformat = rsc->b.b.format;
+ if (bases)
+ bases++;
+ }
+ format = fd3_pipe2color(pformat);
+ if (decode_srgb)
+ srgb = util_format_is_srgb(pformat);
+ else
+ pformat = util_format_linear(pformat);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ offset = fd_resource_offset(rsc, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+ swap = rsc->layout.tile_mode ? WZYX : fd3_pipe2swap(pformat);
+
+ if (bin_w) {
+ stride = bin_w << fdl_cpp_shift(&rsc->layout);
+
+ if (bases) {
+ base = bases[i];
+ }
+ } else {
+ stride = fd_resource_pitch(rsc, psurf->u.tex.level);
+ tile_mode = rsc->layout.tile_mode;
+ }
+ } else if (i < nr_bufs && bases) {
+ base = bases[i];
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
+ OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+ COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+ if (bin_w || (i >= nr_bufs) || !bufs[i]) {
+ OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+ } else {
+ OUT_RELOC(ring, rsc->bo, offset, 0, -1);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
+ OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
+ A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
+ fd3_fs_output_format(pformat))));
+ }
}
static bool
use_hw_binning(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
-
- /* workaround: combining scissor optimization and hw binning
- * seems problematic. Seems like we end up with a mismatch
- * between binning pass and rendering pass, wrt. where the hw
- * thinks the vertices belong. And the blob driver doesn't
- * seem to implement anything like scissor optimization, so
- * not entirely sure what I might be missing.
- *
- * But scissor optimization is mainly for window managers,
- * which don't have many vertices (and therefore doesn't
- * benefit much from binning pass).
- *
- * So for now just disable binning if scissor optimization is
- * used.
- */
- if (gmem->minx || gmem->miny)
- return false;
-
- if ((gmem->maxpw * gmem->maxph) > 32)
- return false;
-
- if ((gmem->maxpw > 15) || (gmem->maxph > 15))
- return false;
-
- return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+
+ /* workaround: combining scissor optimization and hw binning
+ * seems problematic. Seems like we end up with a mismatch
+ * between binning pass and rendering pass, wrt. where the hw
+ * thinks the vertices belong. And the blob driver doesn't
+ * seem to implement anything like scissor optimization, so
+ * not entirely sure what I might be missing.
+ *
+ * But scissor optimization is mainly for window managers,
+ * which don't have many vertices (and therefore doesn't
+ * benefit much from binning pass).
+ *
+ * So for now just disable binning if scissor optimization is
+ * used.
+ */
+ if (gmem->minx || gmem->miny)
+ return false;
+
+ if ((gmem->maxpw * gmem->maxph) > 32)
+ return false;
+
+ if ((gmem->maxpw > 15) || (gmem->maxph > 15))
+ return false;
+
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
}
/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
static void update_vsc_pipe(struct fd_batch *batch);
static void
-emit_binning_workaround(struct fd_batch *batch)
- assert_dt
+emit_binning_workaround(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd3_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->solid_vbuf_state,
- .key = {
- .vs = ctx->solid_prog.vs,
- .fs = ctx->solid_prog.fs,
- },
- };
-
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(0));
- OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
- A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
- A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
-
- OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
- OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
- A3XX_RB_COPY_CONTROL_MODE(0) |
- A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
- OUT_RELOC(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */
- OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
- OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
- A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
- A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
- A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
- A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
-
- fd3_program_emit(ring, &emit, 0, NULL);
- fd3_emit_vertex_bufs(ring, &emit);
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
- A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
- A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
- A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
- A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
- OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
- OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
- A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
-
- OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
- A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
- A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
- A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
- A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
-
- OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
- OUT_RING(ring, 0); /* VFD_INDEX_MIN */
- OUT_RING(ring, 2); /* VFD_INDEX_MAX */
- OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
- OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
-
- OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
-
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
- A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
- A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
- A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
- A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
- OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
- A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
-
- OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
- OUT_RING(ring, 0x00000000); /* viz query info. */
- OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
- INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0));
- OUT_RING(ring, 2); /* NumIndices */
- OUT_RING(ring, 2);
- OUT_RING(ring, 1);
- fd_reset_wfi(batch);
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
-
- OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000);
-
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
- OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
- A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x00000000);
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd3_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->solid_vbuf_state,
+ .key =
+ {
+ .vs = ctx->solid_prog.vs,
+ .fs = ctx->solid_prog.fs,
+ },
+ };
+
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A3XX_RB_COPY_CONTROL_MODE(0) |
+ A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
+ OUT_RELOC(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0,
+ -1); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
+ OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+ A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
+ A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
+ A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ fd3_program_emit(ring, &emit, 0, NULL);
+ fd3_emit_vertex_bufs(ring, &emit);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
+ A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+ OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
+ A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring,
+ A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
+
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
+ OUT_RING(ring, 0x00000000); /* viz query info. */
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, INDEX_SIZE_32_BIT,
+ IGNORE_VISIBILITY, 0));
+ OUT_RING(ring, 2); /* NumIndices */
+ OUT_RING(ring, 2);
+ OUT_RING(ring, 1);
+ fd_reset_wfi(batch);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000);
+
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
}
/* transfer from gmem to system memory (ie. normal RAM) */
static void
emit_gmem2mem_surf(struct fd_batch *batch,
- enum adreno_rb_copy_control_mode mode,
- bool stencil,
- uint32_t base, struct pipe_surface *psurf)
+ enum adreno_rb_copy_control_mode mode, bool stencil,
+ uint32_t base, struct pipe_surface *psurf)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- enum pipe_format format = psurf->format;
-
- if (!rsc->valid)
- return;
-
- if (stencil) {
- rsc = rsc->stencil;
- format = rsc->b.b.format;
- }
-
- uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
- uint32_t pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
- OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
- A3XX_RB_COPY_CONTROL_MODE(mode) |
- A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
- COND(format == PIPE_FORMAT_Z32_FLOAT ||
- format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
- A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));
-
- OUT_RELOC(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */
- OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(pitch));
- OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(rsc->layout.tile_mode) |
- A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
- A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
- A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
- A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
-
- fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ enum pipe_format format = psurf->format;
+
+ if (!rsc->valid)
+ return;
+
+ if (stencil) {
+ rsc = rsc->stencil;
+ format = rsc->b.b.format;
+ }
+
+ uint32_t offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ uint32_t pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A3XX_RB_COPY_CONTROL_MODE(mode) |
+ A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
+ COND(format == PIPE_FORMAT_Z32_FLOAT ||
+ format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
+ A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));
+
+ OUT_RELOC(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(pitch));
+ OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(rsc->layout.tile_mode) |
+ A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
+ A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+ A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
-fd3_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd3_emit_tile_gmem2mem(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd3_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->solid_vbuf_state,
- .key = {
- .vs = ctx->solid_prog.vs,
- .fs = ctx->solid_prog.fs,
- }
- };
- int i;
-
- emit.prog = fd3_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
- A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
- A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
- OUT_RING(ring, 0xff000000 |
- A3XX_RB_STENCILREFMASK_STENCILREF(0) |
- A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
- A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 |
- A3XX_RB_STENCILREFMASK_STENCILREF(0) |
- A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
- A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
-
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
- A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
- A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(batch->gmem_state->bin_w));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
-
- OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
-
- OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
- OUT_RING(ring, 0); /* VFD_INDEX_MIN */
- OUT_RING(ring, 2); /* VFD_INDEX_MAX */
- OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
- OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
-
- fd3_program_emit(ring, &emit, 0, NULL);
- fd3_emit_vertex_bufs(ring, &emit);
-
- if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)
- emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,
- gmem->zsbuf_base[0], pfb->zsbuf);
- if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)
- emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,
- gmem->zsbuf_base[1], pfb->zsbuf);
- }
-
- if (batch->resolve & FD_BUFFER_COLOR) {
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false,
- gmem->cbuf_base[i], pfb->cbufs[i]);
- }
- }
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+ struct fd_context *ctx = batch->ctx;
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd3_emit emit = {.debug = &ctx->debug,
+ .vtx = &ctx->solid_vbuf_state,
+ .key = {
+ .vs = ctx->solid_prog.vs,
+ .fs = ctx->solid_prog.fs,
+ }};
+ int i;
+
+ emit.prog = fd3_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, 0xff000000 | A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 | A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width / 2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width / 2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height / 2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height / 2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring,
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(batch->gmem_state->bin_w));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring,
+ A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ fd3_program_emit(ring, &emit, 0, NULL);
+ fd3_emit_vertex_bufs(ring, &emit);
+
+ if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)
+ emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,
+ gmem->zsbuf_base[0], pfb->zsbuf);
+ if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)
+ emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,
+ gmem->zsbuf_base[1], pfb->zsbuf);
+ }
+
+ if (batch->resolve & FD_BUFFER_COLOR) {
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, gmem->cbuf_base[i],
+ pfb->cbufs[i]);
+ }
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}
/* transfer from system memory to gmem */
static void
emit_mem2gmem_surf(struct fd_batch *batch, const uint32_t bases[],
- struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
+ struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_surface *zsbufs[2];
-
- assert(bufs > 0);
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
-
- emit_mrt(ring, bufs, psurf, bases, bin_w, false);
-
- if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
- psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
- /* Depth is stored as unorm in gmem, so we have to write it in using a
- * special blit shader which writes depth.
- */
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
- A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
- A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
- A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
- A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
- OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
- A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
- OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->gmem_state->bin_w));
-
- if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
- OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
- OUT_RING(ring, 0);
- } else {
- /* The gmem_restore_tex logic will put the first buffer's stencil
- * as color. Supply it with the proper information to make that
- * happen.
- */
- zsbufs[0] = zsbufs[1] = psurf[0];
- psurf = zsbufs;
- bufs = 2;
- }
- } else {
- OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
- OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
- }
-
- fd3_emit_gmem_restore_tex(ring, psurf, bufs);
-
- fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_surface *zsbufs[2];
+
+ assert(bufs > 0);
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
+
+ emit_mrt(ring, bufs, psurf, bases, bin_w, false);
+
+ if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
+ psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ /* Depth is stored as unorm in gmem, so we have to write it in using a
+ * special blit shader which writes depth.
+ */
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
+ A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
+ A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
+ A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
+ OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->gmem_state->bin_w));
+
+ if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
+ OUT_RING(ring, 0);
+ } else {
+ /* The gmem_restore_tex logic will put the first buffer's stencil
+ * as color. Supply it with the proper information to make that
+ * happen.
+ */
+ zsbufs[0] = zsbufs[1] = psurf[0];
+ psurf = zsbufs;
+ bufs = 2;
+ }
+ } else {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
+ }
+
+ fd3_emit_gmem_restore_tex(ring, psurf, bufs);
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
-fd3_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd3_emit_tile_mem2gmem(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd3_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->blit_vbuf_state,
- .sprite_coord_enable = 1,
- };
- /* NOTE: They all use the same VP, this is for vtx bufs. */
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
-
- float x0, y0, x1, y1;
- unsigned bin_w = tile->bin_w;
- unsigned bin_h = tile->bin_h;
- unsigned i;
-
- /* write texture coordinates to vertexbuf: */
- x0 = ((float)tile->xoff) / ((float)pfb->width);
- x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
- y0 = ((float)tile->yoff) / ((float)pfb->height);
- y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
-
- OUT_PKT3(ring, CP_MEM_WRITE, 5);
- OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
- OUT_RING(ring, fui(x0));
- OUT_RING(ring, fui(y0));
- OUT_RING(ring, fui(x1));
- OUT_RING(ring, fui(y1));
-
- fd3_emit_cache_flush(batch, ring);
-
- for (i = 0; i < 4; i++) {
- OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
- A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
- A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
-
- OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
- A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
- A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
- A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
- }
-
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
-
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */
-
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
- OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, 0x2 |
- A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
- A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
- A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
-
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
- OUT_RING(ring, 0); /* RB_STENCIL_INFO */
- OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
-
- OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
- A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
-
- OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
- OUT_RING(ring, 0); /* VFD_INDEX_MIN */
- OUT_RING(ring, 2); /* VFD_INDEX_MAX */
- OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
- OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
-
- fd3_emit_vertex_bufs(ring, &emit);
-
- /* for gmem pitch/base calculations, we need to use the non-
- * truncated tile sizes:
- */
- bin_w = gmem->bin_w;
- bin_h = gmem->bin_h;
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);
- fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
- emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
- }
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
- /* Non-float can use a regular color write. It's split over 8-bit
- * components, so half precision is always sufficient.
- */
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
- } else {
- /* Float depth needs special blit shader that writes depth */
- if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);
- else
- fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);
- }
- fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
- emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
- }
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd3_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->blit_vbuf_state,
+ .sprite_coord_enable = 1,
+ };
+ /* NOTE: They all use the same VP, this is for vtx bufs. */
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
+
+ float x0, y0, x1, y1;
+ unsigned bin_w = tile->bin_w;
+ unsigned bin_h = tile->bin_h;
+ unsigned i;
+
+ /* write texture coordinates to vertexbuf: */
+ x0 = ((float)tile->xoff) / ((float)pfb->width);
+ x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+ y0 = ((float)tile->yoff) / ((float)pfb->height);
+ y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+
+ OUT_PKT3(ring, CP_MEM_WRITE, 5);
+ OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
+ OUT_RING(ring, fui(x0));
+ OUT_RING(ring, fui(y0));
+ OUT_RING(ring, fui(x1));
+ OUT_RING(ring, fui(y1));
+
+ fd3_emit_cache_flush(batch, ring);
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(
+ ring,
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring,
+ A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */
+
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w / 2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w / 2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h / 2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h / 2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, 0x2 | A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+ OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+ OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring,
+ A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ fd3_emit_vertex_bufs(ring, &emit);
+
+ /* for gmem pitch/base calculations, we need to use the non-
+ * truncated tile sizes:
+ */
+ bin_w = gmem->bin_w;
+ bin_h = gmem->bin_h;
+
+ if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);
+ fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+ emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs,
+ bin_w);
+ }
+
+ if (fd_gmem_needs_restore(batch, tile,
+ FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
+ /* Non-float can use a regular color write. It's split over 8-bit
+ * components, so half precision is always sufficient.
+ */
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
+ } else {
+ /* Float depth needs special blit shader that writes depth */
+ if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);
+ else
+ fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);
+ }
+ fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
+ emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
static void
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
{
- unsigned i;
- for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
- *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
- }
- util_dynarray_clear(&batch->draw_patches);
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+ *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
+ }
+ util_dynarray_clear(&batch->draw_patches);
}
static void
patch_rbrc(struct fd_batch *batch, uint32_t val)
{
- unsigned i;
- for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);
- *patch->cs = patch->val | val;
- }
- util_dynarray_clear(&batch->rbrc_patches);
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);
+ *patch->cs = patch->val | val;
+ }
+ util_dynarray_clear(&batch->rbrc_patches);
}
/* for rendering directly to system memory: */
static void
-fd3_emit_sysmem_prep(struct fd_batch *batch)
- assert_dt
+fd3_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring = batch->gmem;
- uint32_t i, pitch = 0;
-
- for (i = 0; i < pfb->nr_cbufs; i++) {
- struct pipe_surface *psurf = pfb->cbufs[i];
- if (!psurf)
- continue;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- pitch = fd_resource_pitch(rsc, psurf->u.tex.level) / rsc->layout.cpp;
- }
-
- fd3_emit_restore(batch, ring);
-
- OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
-
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
-
- /* setup scissor/offset for current tile: */
- OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
- A3XX_RB_WINDOW_OFFSET_Y(0));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
-
- patch_draws(batch, IGNORE_VISIBILITY);
- patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
+ uint32_t i, pitch = 0;
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ struct pipe_surface *psurf = pfb->cbufs[i];
+ if (!psurf)
+ continue;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ pitch = fd_resource_pitch(rsc, psurf->u.tex.level) / rsc->layout.cpp;
+ }
+
+ fd3_emit_restore(batch, ring);
+
+ OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+
+ patch_draws(batch, IGNORE_VISIBILITY);
+ patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
}
static void
-update_vsc_pipe(struct fd_batch *batch)
- assert_dt
+update_vsc_pipe(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd3_context *fd3_ctx = fd3_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
-
- for (i = 0; i < 8; i++) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
-
- if (!ctx->vsc_pipe_bo[i]) {
- ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, 0x40000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
- }
-
- OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
- OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
- A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
- A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
- A3XX_VSC_PIPE_CONFIG_H(pipe->h));
- OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 0); /* VSC_PIPE[i].DATA_ADDRESS */
- OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 32); /* VSC_PIPE[i].DATA_LENGTH */
- }
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
+ for (i = 0; i < 8; i++) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+
+ if (!ctx->vsc_pipe_bo[i]) {
+ ctx->vsc_pipe_bo[i] = fd_bo_new(
+ ctx->dev, 0x40000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
+ OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
+ A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
+ A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
+ A3XX_VSC_PIPE_CONFIG_H(pipe->h));
+ OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
+ 0); /* VSC_PIPE[i].DATA_ADDRESS */
+ OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
+ 32); /* VSC_PIPE[i].DATA_LENGTH */
+ }
}
static void
-emit_binning_pass(struct fd_batch *batch)
- assert_dt
+emit_binning_pass(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- uint32_t x1 = gmem->minx;
- uint32_t y1 = gmem->miny;
- uint32_t x2 = gmem->minx + gmem->width - 1;
- uint32_t y2 = gmem->miny + gmem->height - 1;
-
- if (ctx->screen->gpu_id == 320) {
- emit_binning_workaround(batch);
- fd_wfi(batch, ring);
- OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
- OUT_RING(ring, 0x00007fff);
- }
-
- OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
- OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
-
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
- A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
-
- /* setup scissor/offset for whole screen: */
- OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) |
- A3XX_RB_WINDOW_OFFSET_Y(y1));
-
- OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(0));
-
- for (i = 0; i < 4; i++) {
- OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
- A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
- A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
- }
-
- OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
- A3XX_PC_VSTREAM_CONTROL_N(0));
-
- /* emit IB to binning drawcmds: */
- fd3_emit_ib(ring, batch->binning);
- fd_reset_wfi(batch);
-
- fd_wfi(batch, ring);
-
- /* and then put stuff back the way it was: */
-
- OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
- OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
- A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
- A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
- A3XX_SP_SP_CTRL_REG_L0MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
- OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
- A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
-
- fd_event_write(batch, ring, CACHE_FLUSH);
- fd_wfi(batch, ring);
-
- if (ctx->screen->gpu_id == 320) {
- /* dummy-draw workaround: */
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
- OUT_RING(ring, 0); /* NumIndices */
- fd_reset_wfi(batch);
- }
-
- OUT_PKT3(ring, CP_NOP, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- fd_wfi(batch, ring);
-
- if (ctx->screen->gpu_id == 320) {
- emit_binning_workaround(batch);
- }
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ uint32_t x1 = gmem->minx;
+ uint32_t y1 = gmem->miny;
+ uint32_t x2 = gmem->minx + gmem->width - 1;
+ uint32_t y2 = gmem->miny + gmem->height - 1;
+
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(batch);
+ fd_wfi(batch, ring);
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+ /* setup scissor/offset for whole screen: */
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) | A3XX_RB_WINDOW_OFFSET_Y(y1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
+ }
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring,
+ A3XX_PC_VSTREAM_CONTROL_SIZE(1) | A3XX_PC_VSTREAM_CONTROL_N(0));
+
+ /* emit IB to binning drawcmds: */
+ fd3_emit_ib(ring, batch->binning);
+ fd_reset_wfi(batch);
+
+ fd_wfi(batch, ring);
+
+ /* and then put stuff back the way it was: */
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
+ A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
+ A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+ fd_event_write(batch, ring, CACHE_FLUSH);
+ fd_wfi(batch, ring);
+
+ if (ctx->screen->gpu_id == 320) {
+ /* dummy-draw workaround: */
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN,
+ IGNORE_VISIBILITY, 0));
+ OUT_RING(ring, 0); /* NumIndices */
+ fd_reset_wfi(batch);
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ fd_wfi(batch, ring);
+
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(batch);
+ }
}
/* before first tile */
static void
-fd3_emit_tile_init(struct fd_batch *batch)
- assert_dt
+fd3_emit_tile_init(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- uint32_t rb_render_control;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ uint32_t rb_render_control;
- fd3_emit_restore(batch, ring);
+ fd3_emit_restore(batch, ring);
- /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
- * at the right and bottom edge tiles
- */
- OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
- OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
- A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+ /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
+ * at the right and bottom edge tiles
+ */
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
- update_vsc_pipe(batch);
+ update_vsc_pipe(batch);
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- if (use_hw_binning(batch)) {
- /* emit hw binning pass: */
- emit_binning_pass(batch);
+ if (use_hw_binning(batch)) {
+ /* emit hw binning pass: */
+ emit_binning_pass(batch);
- patch_draws(batch, USE_VISIBILITY);
- } else {
- patch_draws(batch, IGNORE_VISIBILITY);
- }
+ patch_draws(batch, USE_VISIBILITY);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
- rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
+ rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
- patch_rbrc(batch, rb_render_control);
+ patch_rbrc(batch, rb_render_control);
}
/* before mem2gmem */
static void
fd3_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
- A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
/* before IB to rendering cmds: */
static void
-fd3_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd3_emit_tile_renderprep(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd3_context *fd3_ctx = fd3_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- uint32_t x1 = tile->xoff;
- uint32_t y1 = tile->yoff;
- uint32_t x2 = tile->xoff + tile->bin_w - 1;
- uint32_t y2 = tile->yoff + tile->bin_h - 1;
-
- uint32_t reg;
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
- reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
- if (pfb->zsbuf) {
- reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
- }
- OUT_RING(ring, reg);
- if (pfb->zsbuf) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- OUT_RING(ring, A3XX_RB_DEPTH_PITCH(gmem->bin_w <<
- fdl_cpp_shift(&rsc->layout)));
- if (rsc->stencil) {
- OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
- OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
- OUT_RING(ring, A3XX_RB_STENCIL_PITCH(gmem->bin_w <<
- fdl_cpp_shift(&rsc->stencil->layout)));
- }
- } else {
- OUT_RING(ring, 0x00000000);
- }
-
- if (use_hw_binning(batch)) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
- struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
-
- assert(pipe->w && pipe->h);
-
- fd_event_write(batch, ring, HLSQ_FLUSH);
- fd_wfi(batch, ring);
-
- OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
- A3XX_PC_VSTREAM_CONTROL_N(tile->n));
-
-
- OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
- OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
- (tile->p * 4), 0, 0);
- } else {
- OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
- }
-
- OUT_PKT3(ring, CP_SET_BIN, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
- OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
-
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
-
- /* setup scissor/offset for current tile: */
- OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
- A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
-
- OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
- A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+ struct fd_context *ctx = batch->ctx;
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ uint32_t x1 = tile->xoff;
+ uint32_t y1 = tile->yoff;
+ uint32_t x2 = tile->xoff + tile->bin_w - 1;
+ uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+ uint32_t reg;
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+ if (pfb->zsbuf) {
+ reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ }
+ OUT_RING(ring, reg);
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ OUT_RING(ring,
+ A3XX_RB_DEPTH_PITCH(gmem->bin_w << fdl_cpp_shift(&rsc->layout)));
+ if (rsc->stencil) {
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+ OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+ OUT_RING(ring, A3XX_RB_STENCIL_PITCH(gmem->bin_w << fdl_cpp_shift(
+ &rsc->stencil->layout)));
+ }
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+
+ if (use_hw_binning(batch)) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
+ struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
+
+ assert(pipe->w && pipe->h);
+
+ fd_event_write(batch, ring, HLSQ_FLUSH);
+ fd_wfi(batch, ring);
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
+ A3XX_PC_VSTREAM_CONTROL_N(tile->n));
+
+ OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
+ OUT_RELOC(ring, pipe_bo, 0, 0,
+ 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <-
+ VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT3(ring, CP_SET_BIN, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+ OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w,
+ true);
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
+ A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
}
void
-fd3_gmem_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd3_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
- ctx->emit_tile_init = fd3_emit_tile_init;
- ctx->emit_tile_prep = fd3_emit_tile_prep;
- ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
- ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
- ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
+ ctx->emit_tile_init = fd3_emit_tile_init;
+ ctx->emit_tile_prep = fd3_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
+#include "util/u_string.h"
#include "freedreno_program.h"
-#include "fd3_program.h"
#include "fd3_emit.h"
-#include "fd3_texture.h"
#include "fd3_format.h"
+#include "fd3_program.h"
+#include "fd3_texture.h"
bool
fd3_needs_manual_clipping(const struct ir3_shader *shader,
- const struct pipe_rasterizer_state *rast)
+ const struct pipe_rasterizer_state *rast)
{
- uint64_t outputs = ir3_shader_outputs(shader);
+ uint64_t outputs = ir3_shader_outputs(shader);
- return (!rast->depth_clip_near ||
- util_bitcount(rast->clip_plane_enable) > 6 ||
- outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
- (1ULL << VARYING_SLOT_CLIP_DIST0) |
- (1ULL << VARYING_SLOT_CLIP_DIST1)));
+ return (!rast->depth_clip_near ||
+ util_bitcount(rast->clip_plane_enable) > 6 ||
+ outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
+ (1ULL << VARYING_SLOT_CLIP_DIST0) |
+ (1ULL << VARYING_SLOT_CLIP_DIST1)));
}
-
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
- const struct ir3_info *si = &so->info;
- enum adreno_state_block sb;
- enum adreno_state_src src;
- uint32_t i, sz, *bin;
-
- if (so->type == MESA_SHADER_VERTEX) {
- sb = SB_VERT_SHADER;
- } else {
- sb = SB_FRAG_SHADER;
- }
-
- if (FD_DBG(DIRECT)) {
- sz = si->sizedwords;
- src = SS_DIRECT;
- bin = fd_bo_map(so->bo);
- } else {
- sz = 0;
- src = SS_INDIRECT;
- bin = NULL;
- }
-
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
- CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
- if (bin) {
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
- } else {
- OUT_RELOC(ring, so->bo, 0,
- CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
- }
- for (i = 0; i < sz; i++) {
- OUT_RING(ring, bin[i]);
- }
+ const struct ir3_info *si = &so->info;
+ enum adreno_state_block sb;
+ enum adreno_state_src src;
+ uint32_t i, sz, *bin;
+
+ if (so->type == MESA_SHADER_VERTEX) {
+ sb = SB_VERT_SHADER;
+ } else {
+ sb = SB_FRAG_SHADER;
+ }
+
+ if (FD_DBG(DIRECT)) {
+ sz = si->sizedwords;
+ src = SS_DIRECT;
+ bin = fd_bo_map(so->bo);
+ } else {
+ sz = 0;
+ src = SS_INDIRECT;
+ bin = NULL;
+ }
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+ if (bin) {
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+ } else {
+ OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+ }
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, bin[i]);
+ }
}
void
-fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
- int nr, struct pipe_surface **bufs)
+fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
+ struct pipe_surface **bufs)
{
- const struct ir3_shader_variant *vp, *fp;
- const struct ir3_info *vsi, *fsi;
- enum a3xx_instrbuffermode fpbuffer, vpbuffer;
- uint32_t fpbuffersz, vpbuffersz, fsoff;
- uint32_t pos_regid, posz_regid, psize_regid;
- uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
- uint32_t color_regid[4] = {0};
- int constmode;
- int i, j;
-
- debug_assert(nr <= ARRAY_SIZE(color_regid));
-
- vp = fd3_emit_get_vp(emit);
- fp = fd3_emit_get_fp(emit);
-
- vsi = &vp->info;
- fsi = &fp->info;
-
- fpbuffer = BUFFER;
- vpbuffer = BUFFER;
- fpbuffersz = fp->instrlen;
- vpbuffersz = vp->instrlen;
-
- /*
- * Decide whether to use BUFFER or CACHE mode for VS and FS. It
- * appears like 256 is the hard limit, but when the combined size
- * exceeds 128 then blob will try to keep FS in BUFFER mode and
- * switch to CACHE for VS until VS is too large. The blob seems
- * to switch FS out of BUFFER mode at slightly under 128. But
- * a bit fuzzy on the decision tree, so use slightly conservative
- * limits.
- *
- * TODO check if these thresholds for BUFFER vs CACHE mode are the
- * same for all a3xx or whether we need to consider the gpuid
- */
-
- if ((fpbuffersz + vpbuffersz) > 128) {
- if (fpbuffersz < 112) {
- /* FP:BUFFER VP:CACHE */
- vpbuffer = CACHE;
- vpbuffersz = 256 - fpbuffersz;
- } else if (vpbuffersz < 112) {
- /* FP:CACHE VP:BUFFER */
- fpbuffer = CACHE;
- fpbuffersz = 256 - vpbuffersz;
- } else {
- /* FP:CACHE VP:CACHE */
- vpbuffer = fpbuffer = CACHE;
- vpbuffersz = fpbuffersz = 192;
- }
- }
-
- if (fpbuffer == BUFFER) {
- fsoff = 128 - fpbuffersz;
- } else {
- fsoff = 256 - fpbuffersz;
- }
-
- /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
- constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
-
- pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
- posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
- psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
- if (fp->color0_mrt) {
- color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
- ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
- } else {
- color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
- color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
- color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
- color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
- }
-
- face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
- coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
- zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
- ij_regid[0] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
- ij_regid[1] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
- ij_regid[2] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
- ij_regid[3] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
-
- /* adjust regids for alpha output formats. there is no alpha render
- * format, so it's just treated like red
- */
- for (i = 0; i < nr; i++)
- if (util_format_is_alpha(pipe_surface_format(bufs[i])))
- color_regid[i] += 3;
-
- /* we could probably divide this up into things that need to be
- * emitted if frag-prog is dirty vs if vert-prog is dirty..
- */
-
- OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
- A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
- A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
- /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
- * flush some caches? I think we only need to set those
- * bits if we have updated const or shader..
- */
- A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
- A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
- OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
- A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
- A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
- A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
- OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
- A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
- OUT_RING(ring,
- A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
- A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
- A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
- A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
- OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
- A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
- A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
- OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
- A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
- A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
-
- OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
- OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
- COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
- A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
- A3XX_SP_SP_CTRL_REG_L0MODE(0));
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
- OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
- OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
- A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
- COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
- A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
- A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
- A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
- A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
- A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
- OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
- A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
- A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
- OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
- A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
- A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
-
- struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, vp, fp, false);
-
- for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
-
- reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
- reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
- j++;
-
- reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
- reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
- j++;
-
- OUT_RING(ring, reg);
- }
-
- for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
-
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
- reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
-
- OUT_RING(ring, reg);
- }
-
- OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
- A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
-
- if (emit->binning_pass) {
- OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
- OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
- A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- } else {
- OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
- COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
- A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
- A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
- A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
- COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
- A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
- A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
- A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen - 1, 0)) |
- A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
- MAX2(128, vp->constlen)) |
- A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
- OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
- }
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
- OUT_RING(ring,
- COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
- A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
- A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
- for (i = 0; i < 4; i++) {
- uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
- COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
-
- if (i < nr) {
- enum pipe_format fmt = pipe_surface_format(bufs[i]);
- mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
- COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
- }
- OUT_RING(ring, mrt_reg);
- }
-
- if (emit->binning_pass) {
- OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
- OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
- A3XX_VPC_ATTR_LMSIZE(1) |
- COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
- OUT_RING(ring, 0x00000000);
- } else {
- uint32_t vinterp[4], flatshade[2], vpsrepl[4];
-
- memset(vinterp, 0, sizeof(vinterp));
- memset(flatshade, 0, sizeof(flatshade));
- memset(vpsrepl, 0, sizeof(vpsrepl));
-
- /* figure out VARYING_INTERP / FLAT_SHAD register values: */
- for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
- /* NOTE: varyings are packed, so if compmask is 0xb
- * then first, third, and fourth component occupy
- * three consecutive varying slots:
- */
- unsigned compmask = fp->inputs[j].compmask;
-
- uint32_t inloc = fp->inputs[j].inloc;
-
- if (fp->inputs[j].flat ||
- (fp->inputs[j].rasterflat && emit->rasterflat)) {
- uint32_t loc = inloc;
-
- for (i = 0; i < 4; i++) {
- if (compmask & (1 << i)) {
- vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
- flatshade[loc / 32] |= 1 << (loc % 32);
- loc++;
- }
- }
- }
-
- bool coord_mode = emit->sprite_coord_mode;
- if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
- /* mask is two 2-bit fields, where:
- * '01' -> S
- * '10' -> T
- * '11' -> 1 - T (flip mode)
- */
- unsigned mask = coord_mode ? 0b1101 : 0b1001;
- uint32_t loc = inloc;
- if (compmask & 0x1) {
- vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x2) {
- vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x4) {
- /* .z <- 0.0f */
- vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x8) {
- /* .w <- 1.0f */
- vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
- loc++;
- }
- }
- }
-
- OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
- OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
- A3XX_VPC_ATTR_THRDASSIGN(1) |
- A3XX_VPC_ATTR_LMSIZE(1) |
- COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
- OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
- A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
- OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
- OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
- OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
- OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
- OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
- OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
- OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
- OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
- OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
- OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
- }
-
- if (vpbuffer == BUFFER)
- emit_shader(ring, vp);
-
- OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
-
- if (!emit->binning_pass) {
- if (fpbuffer == BUFFER)
- emit_shader(ring, fp);
-
- OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
- }
+ const struct ir3_shader_variant *vp, *fp;
+ const struct ir3_info *vsi, *fsi;
+ enum a3xx_instrbuffermode fpbuffer, vpbuffer;
+ uint32_t fpbuffersz, vpbuffersz, fsoff;
+ uint32_t pos_regid, posz_regid, psize_regid;
+ uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
+ uint32_t color_regid[4] = {0};
+ int constmode;
+ int i, j;
+
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
+ vp = fd3_emit_get_vp(emit);
+ fp = fd3_emit_get_fp(emit);
+
+ vsi = &vp->info;
+ fsi = &fp->info;
+
+ fpbuffer = BUFFER;
+ vpbuffer = BUFFER;
+ fpbuffersz = fp->instrlen;
+ vpbuffersz = vp->instrlen;
+
+ /*
+ * Decide whether to use BUFFER or CACHE mode for VS and FS. It
+ * appears like 256 is the hard limit, but when the combined size
+ * exceeds 128 then blob will try to keep FS in BUFFER mode and
+ * switch to CACHE for VS until VS is too large. The blob seems
+ * to switch FS out of BUFFER mode at slightly under 128. But
+ * a bit fuzzy on the decision tree, so use slightly conservative
+ * limits.
+ *
+ * TODO check if these thresholds for BUFFER vs CACHE mode are the
+ * same for all a3xx or whether we need to consider the gpuid
+ */
+
+ if ((fpbuffersz + vpbuffersz) > 128) {
+ if (fpbuffersz < 112) {
+ /* FP:BUFFER VP:CACHE */
+ vpbuffer = CACHE;
+ vpbuffersz = 256 - fpbuffersz;
+ } else if (vpbuffersz < 112) {
+ /* FP:CACHE VP:BUFFER */
+ fpbuffer = CACHE;
+ fpbuffersz = 256 - vpbuffersz;
+ } else {
+ /* FP:CACHE VP:CACHE */
+ vpbuffer = fpbuffer = CACHE;
+ vpbuffersz = fpbuffersz = 192;
+ }
+ }
+
+ if (fpbuffer == BUFFER) {
+ fsoff = 128 - fpbuffersz;
+ } else {
+ fsoff = 256 - fpbuffersz;
+ }
+
+ /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
+ constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
+
+ pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+ posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+ psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
+ if (fp->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
+ } else {
+ color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
+ }
+
+ face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid =
+ (coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2);
+ ij_regid[0] =
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
+ ij_regid[1] =
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
+ ij_regid[2] =
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
+ ij_regid[3] =
+ ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
+
+ /* adjust regids for alpha output formats. there is no alpha render
+ * format, so it's just treated like red
+ */
+ for (i = 0; i < nr; i++)
+ if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+ color_regid[i] += 3;
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+ /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
+ * flush some caches? I think we only need to set those
+ * bits if we have updated const or shader..
+ */
+ A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+ A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
+ A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
+ OUT_RING(ring,
+ A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
+ A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
+ A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
+ A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
+ OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
+ A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
+ A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
+ OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
+ A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
+ A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
+
+ OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
+ COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
+ A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
+ OUT_RING(ring,
+ A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
+ COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
+ A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
+ A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
+ A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+ A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+ A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
+ OUT_RING(ring,
+ A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
+ A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
+ A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
+ OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+ A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+ A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
+
+ struct ir3_shader_linkage l = {0};
+ ir3_link_shaders(&l, vp, fp, false);
+
+ for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
+
+ reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
+ reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
+ j++;
+
+ reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
+ reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
+ j++;
+
+ OUT_RING(ring, reg);
+ }
+
+ for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
+
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
+
+ OUT_RING(ring, reg);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
+ A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
+
+ if (emit->binning_pass) {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+ } else {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring,
+ A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
+ COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
+ A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
+ A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+ A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+ A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+ A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
+ A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(
+ MAX2(fp->constlen - 1, 0)) |
+ A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
+ MAX2(128, vp->constlen)) |
+ A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ }
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+ OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+ A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
+ A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
+ for (i = 0; i < 4; i++) {
+ uint32_t mrt_reg =
+ A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+ COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
+
+ if (i < nr) {
+ enum pipe_format fmt = pipe_surface_format(bufs[i]);
+ mrt_reg |=
+ COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
+ COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
+ }
+ OUT_RING(ring, mrt_reg);
+ }
+
+ if (emit->binning_pass) {
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
+ COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ uint32_t vinterp[4], flatshade[2], vpsrepl[4];
+
+ memset(vinterp, 0, sizeof(vinterp));
+ memset(flatshade, 0, sizeof(flatshade));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
+
+ /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+ for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count;) {
+ /* NOTE: varyings are packed, so if compmask is 0xb
+ * then first, third, and fourth component occupy
+ * three consecutive varying slots:
+ */
+ unsigned compmask = fp->inputs[j].compmask;
+
+ uint32_t inloc = fp->inputs[j].inloc;
+
+ if (fp->inputs[j].flat ||
+ (fp->inputs[j].rasterflat && emit->rasterflat)) {
+ uint32_t loc = inloc;
+
+ for (i = 0; i < 4; i++) {
+ if (compmask & (1 << i)) {
+ vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
+ flatshade[loc / 32] |= 1 << (loc % 32);
+ loc++;
+ }
+ }
+ }
+
+ bool coord_mode = emit->sprite_coord_mode;
+ if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
+ /* mask is two 2-bit fields, where:
+ * '01' -> S
+ * '10' -> T
+ * '11' -> 1 - T (flip mode)
+ */
+ unsigned mask = coord_mode ? 0b1101 : 0b1001;
+ uint32_t loc = inloc;
+ if (compmask & 0x1) {
+ vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x2) {
+ vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x4) {
+ /* .z <- 0.0f */
+ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x8) {
+ /* .w <- 1.0f */
+ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+ A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
+ COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+ A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+ OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+ OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
+ OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
+ OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
+ }
+
+ if (vpbuffer == BUFFER)
+ emit_shader(ring, vp);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+
+ if (!emit->binning_pass) {
+ if (fpbuffer == BUFFER)
+ emit_shader(ring, fp);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+ }
}
static struct ir3_program_state *
fd3_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs,
- struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds,
- struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key)
- in_dt
+ struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
+ struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
+ struct ir3_shader_variant *fs,
+ const struct ir3_shader_key *key) in_dt
{
- struct fd_context *ctx = fd_context(data);
- struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
+ struct fd_context *ctx = fd_context(data);
+ struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- state->bs = bs;
- state->vs = vs;
- state->fs = fs;
+ state->bs = bs;
+ state->vs = vs;
+ state->fs = fs;
- return &state->base;
+ return &state->base;
}
static void
fd3_program_destroy(void *data, struct ir3_program_state *state)
{
- struct fd3_program_state *so = fd3_program_state(state);
- free(so);
+ struct fd3_program_state *so = fd3_program_state(state);
+ free(so);
}
static const struct ir3_cache_funcs cache_funcs = {
- .create_state = fd3_program_create,
- .destroy_state = fd3_program_destroy,
+ .create_state = fd3_program_create,
+ .destroy_state = fd3_program_destroy,
};
void
fd3_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
- ir3_prog_init(pctx);
- fd_prog_init(pctx);
+ ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
+ ir3_prog_init(pctx);
+ fd_prog_init(pctx);
}
struct fd3_emit;
struct fd3_program_state {
- struct ir3_program_state base;
- struct ir3_shader_variant *bs; /* VS for when emit->binning */
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *fs; /* FS for when !emit->binning */
+ struct ir3_program_state base;
+ struct ir3_shader_variant *bs; /* VS for when emit->binning */
+ struct ir3_shader_variant *vs;
+ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd3_program_state *
fd3_program_state(struct ir3_program_state *state)
{
- return (struct fd3_program_state *)state;
+ return (struct fd3_program_state *)state;
}
-void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
- int nr, struct pipe_surface **bufs);
+void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
+ struct pipe_surface **bufs);
void fd3_prog_init(struct pipe_context *pctx);
bool fd3_needs_manual_clipping(const struct ir3_shader *,
- const struct pipe_rasterizer_state *);
+ const struct pipe_rasterizer_state *);
#endif /* FD3_PROGRAM_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-#include "freedreno_query_hw.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
-#include "fd3_query.h"
#include "fd3_format.h"
-
+#include "fd3_query.h"
struct fd_rb_samp_ctrs {
- uint64_t ctr[16];
+ uint64_t ctr[16];
};
/*
static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_hw_sample *samp =
- fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
-
- /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
- * HW_QUERY_BASE_REG register:
- */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
- OUT_RING(ring, HW_QUERY_BASE_REG);
- OUT_RING(ring, samp->offset);
-
- OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
-
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, USE_VISIBILITY, 0));
- OUT_RING(ring, 0); /* NumIndices */
-
- fd_event_write(batch, ring, ZPASS_DONE);
-
- OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
- OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
-
- OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
- OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
- A3XX_VBIF_PERF_CNT_EN_CNT1 |
- A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
- A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
- A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
-
- return samp;
+ struct fd_hw_sample *samp =
+ fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
+
+ /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+ * HW_QUERY_BASE_REG register:
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
+ OUT_RING(ring, HW_QUERY_BASE_REG);
+ OUT_RING(ring, samp->offset);
+
+ OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, USE_VISIBILITY, 0));
+ OUT_RING(ring, 0); /* NumIndices */
+
+ fd_event_write(batch, ring, ZPASS_DONE);
+
+ OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
+ OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
+ OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 | A3XX_VBIF_PERF_CNT_EN_CNT1 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
+
+ return samp;
}
static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
- const struct fd_rb_samp_ctrs *end)
+ const struct fd_rb_samp_ctrs *end)
{
- uint64_t n = 0;
- unsigned i;
+ uint64_t n = 0;
+ unsigned i;
- /* not quite sure what all of these are, possibly different
- * counters for each MRT render target:
- */
- for (i = 0; i < 16; i += 4)
- n += end->ctr[i] - start->ctr[i];
+ /* not quite sure what all of these are, possibly different
+ * counters for each MRT render target:
+ */
+ for (i = 0; i < 16; i += 4)
+ n += end->ctr[i] - start->ctr[i];
- return n;
+ return n;
}
static void
-occlusion_counter_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end,
+ union pipe_query_result *result)
{
- uint64_t n = count_samples(start, end);
- result->u64 += n;
+ uint64_t n = count_samples(start, end);
+ result->u64 += n;
}
static void
-occlusion_predicate_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end,
+ union pipe_query_result *result)
{
- uint64_t n = count_samples(start, end);
- result->b |= (n > 0);
+ uint64_t n = count_samples(start, end);
+ result->b |= (n > 0);
}
static const struct fd_hw_sample_provider occlusion_counter = {
- .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_counter_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_counter_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_predicate_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_predicate_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
};
-void fd3_query_context_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+void
+fd3_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->create_query = fd_hw_create_query;
- ctx->query_prepare = fd_hw_query_prepare;
- ctx->query_prepare_tile = fd_hw_query_prepare_tile;
- ctx->query_update_batch = fd_hw_query_update_batch;
+ ctx->create_query = fd_hw_create_query;
+ ctx->query_prepare = fd_hw_query_prepare;
+ ctx->query_prepare_tile = fd_hw_query_prepare_tile;
+ ctx->query_update_batch = fd_hw_query_update_batch;
- fd_hw_query_register_provider(pctx, &occlusion_counter);
- fd_hw_query_register_provider(pctx, &occlusion_predicate);
- fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
+ fd_hw_query_register_provider(pctx, &occlusion_counter);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
}
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd3_rasterizer.h"
#include "fd3_context.h"
#include "fd3_format.h"
+#include "fd3_rasterizer.h"
void *
fd3_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso)
+ const struct pipe_rasterizer_state *cso)
{
- struct fd3_rasterizer_stateobj *so;
- float psize_min, psize_max;
+ struct fd3_rasterizer_stateobj *so;
+ float psize_min, psize_max;
- so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- if (cso->point_size_per_vertex) {
- psize_min = util_get_min_point_size(cso);
- psize_max = 4092;
- } else {
- /* Force the point size to be as if the vertex output was disabled. */
- psize_min = cso->point_size;
- psize_max = cso->point_size;
- }
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 4092;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
-/*
- if (cso->line_stipple_enable) {
- ??? TODO line stipple
- }
- TODO cso->half_pixel_center
- if (cso->multisample)
- TODO
-*/
- so->gras_cl_clip_cntl =
- COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
- so->gras_su_point_minmax =
- A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
- A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
- so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
- so->gras_su_poly_offset_scale =
- A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
- so->gras_su_poly_offset_offset =
- A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
+ /*
+ if (cso->line_stipple_enable) {
+ ??? TODO line stipple
+ }
+ TODO cso->half_pixel_center
+ if (cso->multisample)
+ TODO
+ */
+ so->gras_cl_clip_cntl =
+ COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
+ so->gras_su_point_minmax = A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+ A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+ so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
+ so->gras_su_poly_offset_scale =
+ A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
+ so->gras_su_poly_offset_offset =
+ A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
- so->gras_su_mode_control =
- A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+ so->gras_su_mode_control =
+ A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
- so->pc_prim_vtx_cntl =
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
- A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+ so->pc_prim_vtx_cntl = A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(
+ fd_polygon_mode(cso->fill_front)) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(
+ fd_polygon_mode(cso->fill_back));
- if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
- cso->fill_back != PIPE_POLYGON_MODE_FILL)
- so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
- if (cso->cull_face & PIPE_FACE_FRONT)
- so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
- if (cso->cull_face & PIPE_FACE_BACK)
- so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
- if (!cso->front_ccw)
- so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
- if (!cso->flatshade_first)
- so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+ if (!cso->front_ccw)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+ if (!cso->flatshade_first)
+ so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
- if (cso->offset_tri)
- so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
- if (!cso->depth_clip_near)
- so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ if (cso->offset_tri)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+ if (!cso->depth_clip_near)
+ so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
- return so;
+ return so;
}
#ifndef FD3_RASTERIZER_H_
#define FD3_RASTERIZER_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
struct fd3_rasterizer_stateobj {
- struct pipe_rasterizer_state base;
- uint32_t gras_su_point_minmax;
- uint32_t gras_su_point_size;
- uint32_t gras_su_poly_offset_scale;
- uint32_t gras_su_poly_offset_offset;
+ struct pipe_rasterizer_state base;
+ uint32_t gras_su_point_minmax;
+ uint32_t gras_su_point_size;
+ uint32_t gras_su_poly_offset_scale;
+ uint32_t gras_su_poly_offset_offset;
- uint32_t gras_su_mode_control;
- uint32_t gras_cl_clip_cntl;
- uint32_t pc_prim_vtx_cntl;
+ uint32_t gras_su_mode_control;
+ uint32_t gras_cl_clip_cntl;
+ uint32_t pc_prim_vtx_cntl;
};
static inline struct fd3_rasterizer_stateobj *
fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
- return (struct fd3_rasterizer_stateobj *)rast;
+ return (struct fd3_rasterizer_stateobj *)rast;
}
-void * fd3_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso);
+void *fd3_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
#endif /* FD3_RASTERIZER_H_ */
#include "fd3_format.h"
static uint32_t
-setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
+setup_slices(struct fd_resource *rsc, uint32_t alignment,
+ enum pipe_format format)
{
- struct pipe_resource *prsc = &rsc->b.b;
- uint32_t level, size = 0;
- uint32_t width0 = prsc->width0;
-
- if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE)
- width0 = util_next_power_of_two(width0);
-
- /* 32 pixel alignment */
- fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
-
- for (level = 0; level <= prsc->last_level; level++) {
- struct fdl_slice *slice = fd_resource_slice(rsc, level);
- uint32_t pitch = fdl_pitch(&rsc->layout, level);
- uint32_t height = u_minify(prsc->height0, level);
- if (rsc->layout.tile_mode) {
- height = align(height, 4);
- if (prsc->target != PIPE_TEXTURE_CUBE)
- height = util_next_power_of_two(height);
- }
-
- uint32_t nblocksy = util_format_get_nblocksy(format, height);
-
- slice->offset = size;
- /* 1d array and 2d array textures must all have the same layer size
- * for each miplevel on a3xx. 3d textures can have different layer
- * sizes for high levels, but the hw auto-sizer is buggy (or at least
- * different than what this code does), so as soon as the layer size
- * range gets into range, we stop reducing it.
- */
- if (prsc->target == PIPE_TEXTURE_3D && (
- level == 1 ||
- (level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000)))
- slice->size0 = align(nblocksy * pitch, alignment);
- else if (level == 0 || alignment == 1)
- slice->size0 = align(nblocksy * pitch, alignment);
- else
- slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
-
- size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
- }
-
- return size;
+ struct pipe_resource *prsc = &rsc->b.b;
+ uint32_t level, size = 0;
+ uint32_t width0 = prsc->width0;
+
+ if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE)
+ width0 = util_next_power_of_two(width0);
+
+ /* 32 pixel alignment */
+ fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
+
+ for (level = 0; level <= prsc->last_level; level++) {
+ struct fdl_slice *slice = fd_resource_slice(rsc, level);
+ uint32_t pitch = fdl_pitch(&rsc->layout, level);
+ uint32_t height = u_minify(prsc->height0, level);
+ if (rsc->layout.tile_mode) {
+ height = align(height, 4);
+ if (prsc->target != PIPE_TEXTURE_CUBE)
+ height = util_next_power_of_two(height);
+ }
+
+ uint32_t nblocksy = util_format_get_nblocksy(format, height);
+
+ slice->offset = size;
+ /* 1d array and 2d array textures must all have the same layer size
+ * for each miplevel on a3xx. 3d textures can have different layer
+ * sizes for high levels, but the hw auto-sizer is buggy (or at least
+ * different than what this code does), so as soon as the layer size
+ * range gets into range, we stop reducing it.
+ */
+ if (prsc->target == PIPE_TEXTURE_3D &&
+ (level == 1 ||
+ (level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000)))
+ slice->size0 = align(nblocksy * pitch, alignment);
+ else if (level == 0 || alignment == 1)
+ slice->size0 = align(nblocksy * pitch, alignment);
+ else
+ slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
+
+ size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
+ }
+
+ return size;
}
uint32_t
fd3_setup_slices(struct fd_resource *rsc)
{
- uint32_t alignment;
-
- switch (rsc->b.b.target) {
- case PIPE_TEXTURE_3D:
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- alignment = 4096;
- break;
- default:
- alignment = 1;
- break;
- }
-
- return setup_slices(rsc, alignment, rsc->b.b.format);
+ uint32_t alignment;
+
+ switch (rsc->b.b.target) {
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ alignment = 4096;
+ break;
+ default:
+ alignment = 1;
+ break;
+ }
+
+ return setup_slices(rsc, alignment, rsc->b.b.format);
}
static bool
ok_format(enum pipe_format pfmt)
{
- enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt);
+ enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt);
- if (fmt == RB_NONE)
- return false;
+ if (fmt == RB_NONE)
+ return false;
- switch (pfmt) {
- case PIPE_FORMAT_R8_UINT:
- case PIPE_FORMAT_R8_SINT:
- case PIPE_FORMAT_Z32_FLOAT:
- return false;
- default:
- break;
- }
+ switch (pfmt) {
+ case PIPE_FORMAT_R8_UINT:
+ case PIPE_FORMAT_R8_SINT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return false;
+ default:
+ break;
+ }
- return true;
+ return true;
}
unsigned
fd3_tile_mode(const struct pipe_resource *tmpl)
{
- if (ok_format(tmpl->format))
- return TILE_4X4;
- return LINEAR;
+ if (ok_format(tmpl->format))
+ return TILE_4X4;
+ return LINEAR;
}
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
-#include "fd3_screen.h"
#include "fd3_context.h"
-#include "fd3_format.h"
#include "fd3_emit.h"
+#include "fd3_format.h"
#include "fd3_resource.h"
+#include "fd3_screen.h"
#include "ir3/ir3_compiler.h"
static bool
fd3_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned usage)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned storage_sample_count, unsigned usage)
{
- unsigned retval = 0;
+ unsigned retval = 0;
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- (sample_count > 1)) { /* TODO add MSAA */
- DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
- util_format_name(format), target, sample_count, usage);
- return false;
- }
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ (sample_count > 1)) { /* TODO add MSAA */
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return false;
+ }
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- (fd3_pipe2vtx(format) != VFMT_NONE)) {
- retval |= PIPE_BIND_VERTEX_BUFFER;
- }
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd3_pipe2vtx(format) != VFMT_NONE)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
- if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- (fd3_pipe2tex(format) != TFMT_NONE)) {
- retval |= PIPE_BIND_SAMPLER_VIEW;
- }
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (fd3_pipe2tex(format) != TFMT_NONE)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED |
- PIPE_BIND_BLENDABLE)) &&
- (fd3_pipe2color(format) != RB_NONE) &&
- (fd3_pipe2tex(format) != TFMT_NONE)) {
- retval |= usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED);
- if (!util_format_is_pure_integer(format))
- retval |= usage & PIPE_BIND_BLENDABLE;
- }
+ if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
+ (fd3_pipe2color(format) != RB_NONE) &&
+ (fd3_pipe2tex(format) != TFMT_NONE)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
+ if (!util_format_is_pure_integer(format))
+ retval |= usage & PIPE_BIND_BLENDABLE;
+ }
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0) &&
- (fd3_pipe2tex(format) != TFMT_NONE)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0) &&
+ (fd3_pipe2tex(format) != TFMT_NONE)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
- if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
- if (retval != usage) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, "
- "usage=%x, retval=%x", util_format_name(format),
- target, sample_count, usage, retval);
- }
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x",
+ util_format_name(format), target, sample_count, usage, retval);
+ }
- return retval == usage;
+ return retval == usage;
}
void
fd3_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = A3XX_MAX_RENDER_TARGETS;
- pscreen->context_create = fd3_context_create;
- pscreen->is_format_supported = fd3_screen_is_format_supported;
- fd3_emit_init_screen(pscreen);
- ir3_screen_init(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->max_rts = A3XX_MAX_RENDER_TARGETS;
+ pscreen->context_create = fd3_context_create;
+ pscreen->is_format_supported = fd3_screen_is_format_supported;
+ fd3_emit_init_screen(pscreen);
+ ir3_screen_init(pscreen);
- screen->setup_slices = fd3_setup_slices;
- if (FD_DBG(TTILE))
- screen->tile_mode = fd3_tile_mode;
+ screen->setup_slices = fd3_setup_slices;
+ if (FD_DBG(TTILE))
+ screen->tile_mode = fd3_tile_mode;
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd3_texture.h"
#include "fd3_format.h"
+#include "fd3_texture.h"
static enum a3xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return A3XX_TEX_REPEAT;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return A3XX_TEX_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- *needs_border = true;
- return A3XX_TEX_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- /* only works for PoT.. need to emulate otherwise! */
- return A3XX_TEX_MIRROR_CLAMP;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return A3XX_TEX_MIRROR_REPEAT;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* these two we could perhaps emulate, but we currently
- * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
- */
- default:
- DBG("invalid wrap: %u", wrap);
- return 0;
- }
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A3XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A3XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ *needs_border = true;
+ return A3XX_TEX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ /* only works for PoT.. need to emulate otherwise! */
+ return A3XX_TEX_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A3XX_TEX_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* these two we could perhaps emulate, but we currently
+ * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+ */
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
}
static enum a3xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST:
- return A3XX_TEX_NEAREST;
- case PIPE_TEX_FILTER_LINEAR:
- return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A3XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static void *
fd3_sampler_state_create(struct pipe_context *pctx,
- const struct pipe_sampler_state *cso)
+ const struct pipe_sampler_state *cso)
{
- struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
- unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
- bool miplinear = false;
-
- if (!so)
- return NULL;
-
- if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- miplinear = true;
-
- so->base = *cso;
-
- so->needs_border = false;
- so->texsamp0 =
- COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
- COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
- COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
- A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
- A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
- A3XX_TEX_SAMP_0_ANISO(aniso) |
- A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
- A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
- A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
-
- if (cso->compare_mode)
- so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
-
- so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias);
-
- if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
- so->texsamp1 |=
- A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
- A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
- } else {
- /* If we're not doing mipmap filtering, we still need a slightly > 0
- * LOD clamp so the HW can decide between min and mag filtering of
- * level 0.
- */
- so->texsamp1 |=
- A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
- A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
- }
-
- return so;
+ struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
+ unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+ bool miplinear = false;
+
+ if (!so)
+ return NULL;
+
+ if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ miplinear = true;
+
+ so->base = *cso;
+
+ so->needs_border = false;
+ so->texsamp0 =
+ COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
+ COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
+ COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
+ A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+ A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+ A3XX_TEX_SAMP_0_ANISO(aniso) |
+ A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
+ A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
+ A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
+
+ if (cso->compare_mode)
+ so->texsamp0 |=
+ A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+ so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias);
+
+ if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+ A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+ } else {
+ /* If we're not doing mipmap filtering, we still need a slightly > 0
+ * LOD clamp so the HW can decide between min and mag filtering of
+ * level 0.
+ */
+ so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
+ A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
+ }
+
+ return so;
}
static enum a3xx_tex_type
tex_type(unsigned target)
{
- switch (target) {
- default:
- assert(0);
- case PIPE_BUFFER:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return A3XX_TEX_1D;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- return A3XX_TEX_2D;
- case PIPE_TEXTURE_3D:
- return A3XX_TEX_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return A3XX_TEX_CUBE;
- }
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A3XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A3XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A3XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A3XX_TEX_CUBE;
+ }
}
static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
- const struct pipe_sampler_view *cso)
+ const struct pipe_sampler_view *cso)
{
- struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
- struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl;
-
- if (!so)
- return NULL;
-
- so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
- so->base.reference.count = 1;
- so->base.context = pctx;
-
- so->texconst0 =
- A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
- A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
- A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
- fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a);
-
- if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
- so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
- if (util_format_is_srgb(cso->format))
- so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
-
- if (prsc->target == PIPE_BUFFER) {
- lvl = 0;
- so->texconst1 =
- A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) |
- A3XX_TEX_CONST_1_HEIGHT(1);
- } else {
- unsigned miplevels;
-
- lvl = fd_sampler_first_level(cso);
- miplevels = fd_sampler_last_level(cso) - lvl;
-
- so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
- so->texconst1 =
- A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) |
- A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- }
- /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
- struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
- so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
- switch (prsc->target) {
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- so->texconst3 =
- A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
- A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
- break;
- case PIPE_TEXTURE_3D:
- so->texconst3 =
- A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
- A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
- so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(
- fd_resource_slice(rsc, prsc->last_level)->size0);
- break;
- default:
- so->texconst3 = 0x00000000;
- break;
- }
-
- return &so->base;
+ struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+ unsigned lvl;
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->texconst0 = A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
+ A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
+ fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
+ if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
+ so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
+ if (util_format_is_srgb(cso->format))
+ so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
+
+ if (prsc->target == PIPE_BUFFER) {
+ lvl = 0;
+ so->texconst1 =
+ A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size /
+ util_format_get_blocksize(cso->format)) |
+ A3XX_TEX_CONST_1_HEIGHT(1);
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+
+ so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 = A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) |
+ A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ }
+ /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
+ struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
+ so->texconst2 = A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
+ switch (prsc->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ so->texconst3 = A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
+ A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
+ break;
+ case PIPE_TEXTURE_3D:
+ so->texconst3 = A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
+ A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
+ so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(
+ fd_resource_slice(rsc, prsc->last_level)->size0);
+ break;
+ default:
+ so->texconst3 = 0x00000000;
+ break;
+ }
+
+ return &so->base;
}
void
fd3_texture_init(struct pipe_context *pctx)
{
- pctx->create_sampler_state = fd3_sampler_state_create;
- pctx->bind_sampler_states = fd_sampler_states_bind;
- pctx->create_sampler_view = fd3_sampler_view_create;
- pctx->set_sampler_views = fd_set_sampler_views;
+ pctx->create_sampler_state = fd3_sampler_state_create;
+ pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->create_sampler_view = fd3_sampler_view_create;
+ pctx->set_sampler_views = fd_set_sampler_views;
}
#include "pipe/p_context.h"
-#include "freedreno_texture.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "fd3_context.h"
#include "fd3_format.h"
struct fd3_sampler_stateobj {
- struct pipe_sampler_state base;
- uint32_t texsamp0, texsamp1;
- bool needs_border;
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1;
+ bool needs_border;
};
static inline struct fd3_sampler_stateobj *
fd3_sampler_stateobj(struct pipe_sampler_state *samp)
{
- return (struct fd3_sampler_stateobj *)samp;
+ return (struct fd3_sampler_stateobj *)samp;
}
struct fd3_pipe_sampler_view {
- struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3;
+ struct pipe_sampler_view base;
+ uint32_t texconst0, texconst1, texconst2, texconst3;
};
static inline struct fd3_pipe_sampler_view *
fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
{
- return (struct fd3_pipe_sampler_view *)pview;
+ return (struct fd3_pipe_sampler_view *)pview;
}
unsigned fd3_get_const_idx(struct fd_context *ctx,
- struct fd_texture_stateobj *tex, unsigned samp_id);
+ struct fd_texture_stateobj *tex, unsigned samp_id);
void fd3_texture_init(struct pipe_context *pctx);
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd3_zsa.h"
#include "fd3_context.h"
#include "fd3_format.h"
+#include "fd3_zsa.h"
void *
fd3_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso)
+ const struct pipe_depth_stencil_alpha_state *cso)
{
- struct fd3_zsa_stateobj *so;
+ struct fd3_zsa_stateobj *so;
- so = CALLOC_STRUCT(fd3_zsa_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd3_zsa_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- so->rb_depth_control |=
- A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
+ so->rb_depth_control |=
+ A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
- if (cso->depth_enabled)
- so->rb_depth_control |=
- A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
- A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+ if (cso->depth_enabled)
+ so->rb_depth_control |=
+ A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
- if (cso->depth_writemask)
- so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+ if (cso->depth_writemask)
+ so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
- if (cso->stencil[0].enabled) {
- const struct pipe_stencil_state *s = &cso->stencil[0];
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
- so->rb_stencil_control |=
- A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
- A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
- A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
- A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
- A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
- so->rb_stencilrefmask |=
- 0xff000000 | /* ??? */
- A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
- A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+ so->rb_stencil_control |=
+ A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
+ A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencilrefmask |=
+ 0xff000000 | /* ??? */
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
- if (cso->stencil[1].enabled) {
- const struct pipe_stencil_state *bs = &cso->stencil[1];
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
- so->rb_stencil_control |=
- A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
- A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
- A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
- A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
- A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
- so->rb_stencilrefmask_bf |=
- 0xff000000 | /* ??? */
- A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
- A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
- }
- }
+ so->rb_stencil_control |=
+ A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ 0xff000000 | /* ??? */
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+ }
+ }
- if (cso->alpha_enabled) {
- so->rb_render_control =
- A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
- A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
- so->rb_alpha_ref =
- A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) |
- A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value);
- so->rb_depth_control |=
- A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
- }
+ if (cso->alpha_enabled) {
+ so->rb_render_control =
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
+ so->rb_alpha_ref = A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) |
+ A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value);
+ so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
- return so;
+ return so;
}
#ifndef FD3_ZSA_H_
#define FD3_ZSA_H_
-
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd3_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state base;
- uint32_t rb_render_control;
- uint32_t rb_alpha_ref;
- uint32_t rb_depth_control;
- uint32_t rb_stencil_control;
- uint32_t rb_stencilrefmask;
- uint32_t rb_stencilrefmask_bf;
+ struct pipe_depth_stencil_alpha_state base;
+ uint32_t rb_render_control;
+ uint32_t rb_alpha_ref;
+ uint32_t rb_depth_control;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
};
static inline struct fd3_zsa_stateobj *
fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
- return (struct fd3_zsa_stateobj *)zsa;
+ return (struct fd3_zsa_stateobj *)zsa;
}
-void * fd3_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso);
+void *fd3_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD3_ZSA_H_ */
#include "pipe/p_state.h"
#include "util/u_blend.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd4_blend.h"
#include "fd4_context.h"
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
- switch (func) {
- case PIPE_BLEND_ADD:
- return BLEND_DST_PLUS_SRC;
- case PIPE_BLEND_MIN:
- return BLEND_MIN_DST_SRC;
- case PIPE_BLEND_MAX:
- return BLEND_MAX_DST_SRC;
- case PIPE_BLEND_SUBTRACT:
- return BLEND_SRC_MINUS_DST;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLEND_DST_MINUS_SRC;
- default:
- DBG("invalid blend func: %x", func);
- return 0;
- }
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
}
void *
fd4_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso)
+ const struct pipe_blend_state *cso)
{
- struct fd4_blend_stateobj *so;
- enum a3xx_rop_code rop = ROP_COPY;
- bool reads_dest = false;
- unsigned i, mrt_blend = 0;
-
- if (cso->logicop_enable) {
- rop = cso->logicop_func; /* maps 1:1 */
- reads_dest = util_logicop_reads_dest(cso->logicop_func);
- }
-
- so = CALLOC_STRUCT(fd4_blend_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
- const struct pipe_rt_blend_state *rt;
-
- if (cso->independent_blend_enable)
- rt = &cso->rt[i];
- else
- rt = &cso->rt[0];
-
- so->rb_mrt[i].blend_control =
- A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
-
- so->rb_mrt[i].control =
- A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
- COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
- A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
-
- if (rt->blend_enable) {
- so->rb_mrt[i].control |=
- A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
- A4XX_RB_MRT_CONTROL_BLEND |
- A4XX_RB_MRT_CONTROL_BLEND2;
- mrt_blend |= (1 << i);
- }
-
- if (reads_dest) {
- so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
- mrt_blend |= (1 << i);
- }
-
- if (cso->dither)
- so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
- }
-
- so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
- COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
-
- return so;
+ struct fd4_blend_stateobj *so;
+ enum a3xx_rop_code rop = ROP_COPY;
+ bool reads_dest = false;
+ unsigned i, mrt_blend = 0;
+
+ if (cso->logicop_enable) {
+ rop = cso->logicop_func; /* maps 1:1 */
+ reads_dest = util_logicop_reads_dest(cso->logicop_func);
+ }
+
+ so = CALLOC_STRUCT(fd4_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+ const struct pipe_rt_blend_state *rt;
+
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
+
+ so->rb_mrt[i].blend_control =
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
+ fd_blend_factor(rt->rgb_src_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
+ fd_blend_factor(rt->rgb_dst_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
+ fd_blend_factor(rt->alpha_src_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
+ blend_func(rt->alpha_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
+ fd_blend_factor(rt->alpha_dst_factor));
+
+ so->rb_mrt[i].control =
+ A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+ if (rt->blend_enable) {
+ so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+ A4XX_RB_MRT_CONTROL_BLEND |
+ A4XX_RB_MRT_CONTROL_BLEND2;
+ mrt_blend |= (1 << i);
+ }
+
+ if (reads_dest) {
+ so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ mrt_blend |= (1 << i);
+ }
+
+ if (cso->dither)
+ so->rb_mrt[i].buf_info |=
+ A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+ }
+
+ so->rb_fs_output =
+ A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
+ COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
+
+ return so;
}
#ifndef FD4_BLEND_H_
#define FD4_BLEND_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd4_blend_stateobj {
- struct pipe_blend_state base;
- struct {
- uint32_t control;
- uint32_t buf_info;
- uint32_t blend_control;
- } rb_mrt[A4XX_MAX_RENDER_TARGETS];
- uint32_t rb_fs_output;
+ struct pipe_blend_state base;
+ struct {
+ uint32_t control;
+ uint32_t buf_info;
+ uint32_t blend_control;
+ } rb_mrt[A4XX_MAX_RENDER_TARGETS];
+ uint32_t rb_fs_output;
};
static inline struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend)
{
- return (struct fd4_blend_stateobj *)blend;
+ return (struct fd4_blend_stateobj *)blend;
}
-void * fd4_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso);
+void *fd4_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
#endif /* FD4_BLEND_H_ */
#include "freedreno_query_hw.h"
-#include "fd4_context.h"
#include "fd4_blend.h"
+#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
#include "fd4_gmem.h"
#include "fd4_zsa.h"
static void
-fd4_context_destroy(struct pipe_context *pctx)
- in_dt
+fd4_context_destroy(struct pipe_context *pctx) in_dt
{
- struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
+ struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
- u_upload_destroy(fd4_ctx->border_color_uploader);
- pipe_resource_reference(&fd4_ctx->border_color_buf, NULL);
+ u_upload_destroy(fd4_ctx->border_color_uploader);
+ pipe_resource_reference(&fd4_ctx->border_color_buf, NULL);
- fd_context_destroy(pctx);
+ fd_context_destroy(pctx);
- fd_bo_del(fd4_ctx->vs_pvt_mem);
- fd_bo_del(fd4_ctx->fs_pvt_mem);
- fd_bo_del(fd4_ctx->vsc_size_mem);
+ fd_bo_del(fd4_ctx->vs_pvt_mem);
+ fd_bo_del(fd4_ctx->fs_pvt_mem);
+ fd_bo_del(fd4_ctx->vsc_size_mem);
- fd_context_cleanup_common_vbos(&fd4_ctx->base);
+ fd_context_cleanup_common_vbos(&fd4_ctx->base);
- fd_hw_query_fini(pctx);
+ fd_hw_query_fini(pctx);
- free(fd4_ctx);
+ free(fd4_ctx);
}
/* clang-format off */
/* clang-format on */
struct pipe_context *
-fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
- in_dt
+fd4_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags) in_dt
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
- struct pipe_context *pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
+ struct pipe_context *pctx;
- if (!fd4_ctx)
- return NULL;
+ if (!fd4_ctx)
+ return NULL;
- pctx = &fd4_ctx->base.base;
- pctx->screen = pscreen;
+ pctx = &fd4_ctx->base.base;
+ pctx->screen = pscreen;
- fd4_ctx->base.dev = fd_device_ref(screen->dev);
- fd4_ctx->base.screen = fd_screen(pscreen);
- fd4_ctx->base.last.key = &fd4_ctx->last_key;
+ fd4_ctx->base.dev = fd_device_ref(screen->dev);
+ fd4_ctx->base.screen = fd_screen(pscreen);
+ fd4_ctx->base.last.key = &fd4_ctx->last_key;
- pctx->destroy = fd4_context_destroy;
- pctx->create_blend_state = fd4_blend_state_create;
- pctx->create_rasterizer_state = fd4_rasterizer_state_create;
- pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
+ pctx->destroy = fd4_context_destroy;
+ pctx->create_blend_state = fd4_blend_state_create;
+ pctx->create_rasterizer_state = fd4_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
- fd4_draw_init(pctx);
- fd4_gmem_init(pctx);
- fd4_texture_init(pctx);
- fd4_prog_init(pctx);
- fd4_emit_init(pctx);
+ fd4_draw_init(pctx);
+ fd4_gmem_init(pctx);
+ fd4_texture_init(pctx);
+ fd4_prog_init(pctx);
+ fd4_emit_init(pctx);
- pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags);
- if (!pctx)
- return NULL;
+ pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags);
+ if (!pctx)
+ return NULL;
- fd_hw_query_init(pctx);
+ fd_hw_query_init(pctx);
- fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
+ fd4_ctx->vs_pvt_mem =
+ fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
- fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
+ fd4_ctx->fs_pvt_mem =
+ fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
- fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
+ fd4_ctx->vsc_size_mem =
+ fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
- fd_context_setup_common_vbos(&fd4_ctx->base);
+ fd_context_setup_common_vbos(&fd4_ctx->base);
- fd4_query_context_init(pctx);
+ fd4_query_context_init(pctx);
- fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
- PIPE_USAGE_STREAM, 0);
+ fd4_ctx->border_color_uploader =
+ u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
- return pctx;
+ return pctx;
}
#include "ir3/ir3_shader.h"
struct fd4_context {
- struct fd_context base;
+ struct fd_context base;
- struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+ struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
- /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
- * could combine it with another allocation.
- *
- * (upper area used as scratch bo.. see fd4_query)
- */
- struct fd_bo *vsc_size_mem;
+ /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
+ * could combine it with another allocation.
+ *
+ * (upper area used as scratch bo.. see fd4_query)
+ */
+ struct fd_bo *vsc_size_mem;
- struct u_upload_mgr *border_color_uploader;
- struct pipe_resource *border_color_buf;
+ struct u_upload_mgr *border_color_uploader;
+ struct pipe_resource *border_color_buf;
- /* bitmask of samplers which need astc srgb workaround: */
- uint16_t vastc_srgb, fastc_srgb;
+ /* bitmask of samplers which need astc srgb workaround: */
+ uint16_t vastc_srgb, fastc_srgb;
- /* storage for ctx->last.key: */
- struct ir3_shader_key last_key;
+ /* storage for ctx->last.key: */
+ struct ir3_shader_key last_key;
};
static inline struct fd4_context *
fd4_context(struct fd_context *ctx)
{
- return (struct fd4_context *)ctx;
+ return (struct fd4_context *)ctx;
}
-struct pipe_context *
-fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+struct pipe_context *fd4_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags);
#endif /* FD4_CONTEXT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd4_draw.h"
#include "fd4_context.h"
+#include "fd4_draw.h"
#include "fd4_emit.h"
-#include "fd4_program.h"
#include "fd4_format.h"
+#include "fd4_program.h"
#include "fd4_zsa.h"
-
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd4_emit *emit, unsigned index_offset)
- assert_dt
+ struct fd4_emit *emit, unsigned index_offset) assert_dt
{
- const struct pipe_draw_info *info = emit->info;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+ const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
- fd4_emit_state(ctx, ring, emit);
+ fd4_emit_state(ctx, ring, emit);
- if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
- fd4_emit_vertex_bufs(ring, emit);
+ if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+ fd4_emit_vertex_bufs(ring, emit);
- OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
- OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
- OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, info->index_size ? info->index_bias
+ : emit->draw->start); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
- OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
- info->restart_index : 0xffffffff);
+ OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+ info->restart_index
+ : 0xffffffff);
- /* points + psize -> spritelist: */
- if (ctx->rasterizer->point_size_per_vertex &&
- fd4_emit_get_vp(emit)->writes_psize &&
- (info->mode == PIPE_PRIM_POINTS))
- primtype = DI_PT_POINTLIST_PSIZE;
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer->point_size_per_vertex &&
+ fd4_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
- fd4_draw_emit(ctx->batch, ring, primtype,
- emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
- info, emit->indirect, emit->draw, index_offset);
+ fd4_draw_emit(ctx->batch, ring, primtype,
+ emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
+ emit->indirect, emit->draw, index_offset);
}
static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
- unsigned index_offset)
- in_dt
+ unsigned index_offset) in_dt
{
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct fd4_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->vtx,
- .info = info,
- .indirect = indirect,
- .draw = draw,
- .key = {
- .vs = ctx->prog.vs,
- .fs = ctx->prog.fs,
- .key = {
- .rasterflat = ctx->rasterizer->flatshade,
- .ucp_enables = ctx->rasterizer->clip_plane_enable,
- .has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb,
- .vastc_srgb = fd4_ctx->vastc_srgb,
- .fastc_srgb = fd4_ctx->fastc_srgb,
- },
- },
- .rasterflat = ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
- .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
- };
-
- if (info->mode != PIPE_PRIM_MAX &&
- !indirect &&
- !info->primitive_restart &&
- !u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
- return false;
-
- ir3_fixup_shader_state(&ctx->base, &emit.key.key);
-
- enum fd_dirty_3d_state dirty = ctx->dirty;
-
- emit.prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
-
- /* bail if compile failed: */
- if (!emit.prog)
- return false;
-
- const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
- const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
-
- ir3_update_max_tf_vtx(ctx, vp);
-
- /* do regular pass first: */
-
- if (unlikely(ctx->stats_users > 0)) {
- ctx->stats.vs_regs += ir3_shader_halfregs(vp);
- ctx->stats.fs_regs += ir3_shader_halfregs(fp);
- }
-
- emit.binning_pass = false;
- emit.dirty = dirty;
-
- struct fd_ringbuffer *ring = ctx->batch->draw;
-
- if (ctx->rasterizer->rasterizer_discard) {
- fd_wfi(ctx->batch, ring);
- OUT_PKT3(ring, CP_REG_RMW, 3);
- OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
- OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
- OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
- }
-
- draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
-
- if (ctx->rasterizer->rasterizer_discard) {
- fd_wfi(ctx->batch, ring);
- OUT_PKT3(ring, CP_REG_RMW, 3);
- OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
- OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
- OUT_RING(ring, 0);
- }
-
- /* and now binning pass: */
- emit.binning_pass = true;
- emit.dirty = dirty & ~(FD_DIRTY_BLEND);
- emit.vs = NULL; /* we changed key so need to refetch vs */
- emit.fs = NULL;
- draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
-
- fd_context_all_clean(ctx);
-
- return true;
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd4_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->vtx,
+ .info = info,
+ .indirect = indirect,
+ .draw = draw,
+ .key =
+ {
+ .vs = ctx->prog.vs,
+ .fs = ctx->prog.fs,
+ .key =
+ {
+ .rasterflat = ctx->rasterizer->flatshade,
+ .ucp_enables = ctx->rasterizer->clip_plane_enable,
+ .has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb,
+ .vastc_srgb = fd4_ctx->vastc_srgb,
+ .fastc_srgb = fd4_ctx->fastc_srgb,
+ },
+ },
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+ };
+
+ if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
+ return false;
+
+ ir3_fixup_shader_state(&ctx->base, &emit.key.key);
+
+ enum fd_dirty_3d_state dirty = ctx->dirty;
+
+ emit.prog = fd4_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
+
+ /* bail if compile failed: */
+ if (!emit.prog)
+ return false;
+
+ const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
+ const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
+
+ ir3_update_max_tf_vtx(ctx, vp);
+
+ /* do regular pass first: */
+
+ if (unlikely(ctx->stats_users > 0)) {
+ ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+ ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+ }
+
+ emit.binning_pass = false;
+ emit.dirty = dirty;
+
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+
+ if (ctx->rasterizer->rasterizer_discard) {
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
+ OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ }
+
+ draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
+
+ if (ctx->rasterizer->rasterizer_discard) {
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
+ OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+ OUT_RING(ring, 0);
+ }
+
+ /* and now binning pass: */
+ emit.binning_pass = true;
+ emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+ emit.vs = NULL; /* we changed key so need to refetch vs */
+ emit.fs = NULL;
+ draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
+
+ fd_context_all_clean(ctx);
+
+ return true;
}
void
-fd4_draw_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd4_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->draw_vbo = fd4_draw_vbo;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd4_draw_vbo;
}
/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
-static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
- enum pc_di_src_sel source_select, enum a4xx_index_size index_size,
- enum pc_di_vis_cull_mode vis_cull_mode)
+static inline uint32_t
+DRAW4(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
+ enum a4xx_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode)
{
- return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) |
- CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) |
- CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
- CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode);
+ return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) |
+ CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) |
+ CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
+ CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode);
}
static inline void
fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- enum pc_di_src_sel src_sel, uint32_t count,
- uint32_t instances, enum a4xx_index_size idx_type,
- uint32_t max_indices, uint32_t idx_offset,
- struct pipe_resource *idx_buffer)
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
+ enum a4xx_index_size idx_type, uint32_t max_indices,
+ uint32_t idx_offset, struct pipe_resource *idx_buffer)
{
- /* for debug after a lock up, write a unique counter value
- * to scratch7 for each draw, to make it easier to match up
- * register dumps to cmdstream. The combination of IB
- * (scratch6) and DRAW is enough to "triangulate" the
- * particular draw that caused lockup.
- */
- emit_marker(ring, 7);
-
- OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
- if (vismode == USE_VISIBILITY) {
- /* leave vis mode blank for now, it will be patched up when
- * we know if we are binning or not
- */
- OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
- &batch->draw_patches);
- } else {
- OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
- }
- OUT_RING(ring, instances); /* NumInstances */
- OUT_RING(ring, count); /* NumIndices */
- if (idx_buffer) {
- OUT_RING(ring, 0x0); /* XXX */
- OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
- OUT_RING (ring, max_indices);
- }
-
- emit_marker(ring, 7);
-
- fd_reset_wfi(batch);
+ /* for debug after a lock up, write a unique counter value
+ * to scratch7 for each draw, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB
+ * (scratch6) and DRAW is enough to "triangulate" the
+ * particular draw that caused lockup.
+ */
+ emit_marker(ring, 7);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
+ if (vismode == USE_VISIBILITY) {
+ /* leave vis mode blank for now, it will be patched up when
+ * we know if we are binning or not
+ */
+ OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+ &batch->draw_patches);
+ } else {
+ OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+ }
+ OUT_RING(ring, instances); /* NumInstances */
+ OUT_RING(ring, count); /* NumIndices */
+ if (idx_buffer) {
+ OUT_RING(ring, 0x0); /* XXX */
+ OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
+ OUT_RING(ring, max_indices);
+ }
+
+ emit_marker(ring, 7);
+
+ fd_reset_wfi(batch);
}
static inline void
fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- const struct pipe_draw_info *info,
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draw,
- unsigned index_offset)
+ const struct pipe_draw_start_count *draw, unsigned index_offset)
{
- struct pipe_resource *idx_buffer = NULL;
- enum a4xx_index_size idx_type;
- enum pc_di_src_sel src_sel;
- uint32_t idx_size, idx_offset;
-
- if (indirect && indirect->buffer) {
- struct fd_resource *ind = fd_resource(indirect->buffer);
-
- emit_marker(ring, 7);
-
- if (info->index_size) {
- struct pipe_resource *idx = info->index.resource;
-
- OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4);
- OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA,
- fd4_size2indextype(info->index_size), 0),
- &batch->draw_patches);
- OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
- OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(
- idx->width0 - index_offset));
- OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
- } else {
- OUT_PKT3(ring, CP_DRAW_INDIRECT, 2);
- OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
- &batch->draw_patches);
- OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
- }
-
- emit_marker(ring, 7);
- fd_reset_wfi(batch);
-
- return;
- }
-
- if (info->index_size) {
- assert(!info->has_user_indices);
-
- idx_buffer = info->index.resource;
- idx_type = fd4_size2indextype(info->index_size);
- idx_size = info->index_size * draw->count;
- idx_offset = index_offset + draw->start * info->index_size;
- src_sel = DI_SRC_SEL_DMA;
- } else {
- idx_buffer = NULL;
- idx_type = INDEX4_SIZE_32_BIT;
- idx_size = 0;
- idx_offset = 0;
- src_sel = DI_SRC_SEL_AUTO_INDEX;
- }
-
- fd4_draw(batch, ring, primtype, vismode, src_sel,
- draw->count, info->instance_count,
- idx_type, idx_size, idx_offset, idx_buffer);
+ struct pipe_resource *idx_buffer = NULL;
+ enum a4xx_index_size idx_type;
+ enum pc_di_src_sel src_sel;
+ uint32_t idx_size, idx_offset;
+
+ if (indirect && indirect->buffer) {
+ struct fd_resource *ind = fd_resource(indirect->buffer);
+
+ emit_marker(ring, 7);
+
+ if (info->index_size) {
+ struct pipe_resource *idx = info->index.resource;
+
+ OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4);
+ OUT_RINGP(ring,
+ DRAW4(primtype, DI_SRC_SEL_DMA,
+ fd4_size2indextype(info->index_size), 0),
+ &batch->draw_patches);
+ OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
+ OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(idx->width0 -
+ index_offset));
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ } else {
+ OUT_PKT3(ring, CP_DRAW_INDIRECT, 2);
+ OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
+ &batch->draw_patches);
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ }
+
+ emit_marker(ring, 7);
+ fd_reset_wfi(batch);
+
+ return;
+ }
+
+ if (info->index_size) {
+ assert(!info->has_user_indices);
+
+ idx_buffer = info->index.resource;
+ idx_type = fd4_size2indextype(info->index_size);
+ idx_size = info->index_size * draw->count;
+ idx_offset = index_offset + draw->start * info->index_size;
+ src_sel = DI_SRC_SEL_DMA;
+ } else {
+ idx_buffer = NULL;
+ idx_type = INDEX4_SIZE_32_BIT;
+ idx_size = 0;
+ idx_offset = 0;
+ src_sel = DI_SRC_SEL_AUTO_INDEX;
+ }
+
+ fd4_draw(batch, ring, primtype, vismode, src_sel, draw->count,
+ info->instance_count, idx_type, idx_size, idx_offset, idx_buffer);
}
#endif /* FD4_DRAW_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_helpers.h"
#include "util/format/u_format.h"
+#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "util/u_viewport.h"
-#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
-#include "fd4_emit.h"
#include "fd4_blend.h"
#include "fd4_context.h"
+#include "fd4_emit.h"
+#include "fd4_format.h"
#include "fd4_program.h"
#include "fd4_rasterizer.h"
#include "fd4_texture.h"
-#include "fd4_format.h"
#include "fd4_zsa.h"
#define emit_const_user fd4_emit_const_user
-#define emit_const_bo fd4_emit_const_bo
+#define emit_const_bo fd4_emit_const_bo
#include "ir3_const.h"
/* regid: base const register
*/
static void
fd4_emit_const_user(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid, uint32_t sizedwords,
- const uint32_t *dwords)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
{
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sizedwords);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4));
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
- for (int i = 0; i < sizedwords; i++)
- OUT_RING(ring, dwords[i]);
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sizedwords);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(sizedwords / 4));
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
+ for (int i = 0; i < sizedwords; i++)
+ OUT_RING(ring, dwords[i]);
}
static void
-fd4_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
- uint32_t regid, uint32_t offset, uint32_t sizedwords,
- struct fd_bo *bo)
+fd4_emit_const_bo(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
{
- uint32_t dst_off = regid / 4;
- assert(dst_off % 4 == 0);
- uint32_t num_unit = sizedwords / 4;
- assert(num_unit % 4 == 0);
-
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT3(ring, CP_LOAD_STATE4, 2);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
- OUT_RELOC(ring, bo, offset,
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
+ uint32_t dst_off = regid / 4;
+ assert(dst_off % 4 == 0);
+ uint32_t num_unit = sizedwords / 4;
+ assert(num_unit % 4 == 0);
+
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
+ OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
}
static void
fd4_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type,
- uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+ uint32_t regid, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- uint32_t anum = align(num, 4);
- uint32_t i;
-
- debug_assert((regid % 4) == 0);
-
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + anum);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(anum/4));
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
-
- for (i = 0; i < num; i++) {
- if (bos[i]) {
- OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
- } else {
- OUT_RING(ring, 0xbad00000 | (i << 16));
- }
- }
-
- for (; i < anum; i++)
- OUT_RING(ring, 0xffffffff);
+ uint32_t anum = align(num, 4);
+ uint32_t i;
+
+ debug_assert((regid % 4) == 0);
+
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + anum);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(anum / 4));
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ }
+ }
+
+ for (; i < anum; i++)
+ OUT_RING(ring, 0xffffffff);
}
static bool
is_stateobj(struct fd_ringbuffer *ring)
{
- return false;
+ return false;
}
static void
-emit_const_ptrs(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
+ uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- /* TODO inline this */
- assert(dst_offset + num <= v->constlen * 4);
- fd4_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
+ /* TODO inline this */
+ assert(dst_offset + num <= v->constlen * 4);
+ fd4_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
}
static void
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a4xx_state_block sb, struct fd_texture_stateobj *tex,
- const struct ir3_shader_variant *v)
+ enum a4xx_state_block sb, struct fd_texture_stateobj *tex,
+ const struct ir3_shader_variant *v)
{
- static const uint32_t bcolor_reg[] = {
- [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
- [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
- };
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- bool needs_border = false;
- unsigned i;
-
- if (tex->num_samplers > 0) {
- int num_samplers;
-
- /* not sure if this is an a420.0 workaround, but we seem
- * to need to emit these in pairs.. emit a final dummy
- * entry if odd # of samplers:
- */
- num_samplers = align(tex->num_samplers, 2);
-
- /* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * num_samplers));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(num_samplers));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- for (i = 0; i < tex->num_samplers; i++) {
- static const struct fd4_sampler_stateobj dummy_sampler = {};
- const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
- fd4_sampler_stateobj(tex->samplers[i]) :
- &dummy_sampler;
- OUT_RING(ring, sampler->texsamp0);
- OUT_RING(ring, sampler->texsamp1);
-
- needs_border |= sampler->needs_border;
- }
-
- for (; i < num_samplers; i++) {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
- }
-
- if (tex->num_textures > 0) {
- unsigned num_textures = tex->num_textures + v->astc_srgb.count;
-
- /* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- for (i = 0; i < tex->num_textures; i++) {
- static const struct fd4_pipe_sampler_view dummy_view = {};
- const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
- fd4_pipe_sampler_view(tex->textures[i]) :
- &dummy_view;
-
- OUT_RING(ring, view->texconst0);
- OUT_RING(ring, view->texconst1);
- OUT_RING(ring, view->texconst2);
- OUT_RING(ring, view->texconst3);
- if (view->base.texture) {
- struct fd_resource *rsc = fd_resource(view->base.texture);
- if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
- rsc = rsc->stencil;
- OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- }
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
-
- for (i = 0; i < v->astc_srgb.count; i++) {
- static const struct fd4_pipe_sampler_view dummy_view = {};
- const struct fd4_pipe_sampler_view *view;
- unsigned idx = v->astc_srgb.orig_idx[i];
-
- view = tex->textures[idx] ?
- fd4_pipe_sampler_view(tex->textures[idx]) :
- &dummy_view;
-
- debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
-
- OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
- OUT_RING(ring, view->texconst1);
- OUT_RING(ring, view->texconst2);
- OUT_RING(ring, view->texconst3);
- if (view->base.texture) {
- struct fd_resource *rsc = fd_resource(view->base.texture);
- OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- }
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
- } else {
- debug_assert(v->astc_srgb.count == 0);
- }
-
- if (needs_border) {
- unsigned off;
- void *ptr;
-
- u_upload_alloc(fd4_ctx->border_color_uploader,
- 0, BORDER_COLOR_UPLOAD_SIZE,
- BORDER_COLOR_UPLOAD_SIZE, &off,
- &fd4_ctx->border_color_buf,
- &ptr);
-
- fd_setup_border_colors(tex, ptr, 0);
- OUT_PKT0(ring, bcolor_reg[sb], 1);
- OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
-
- u_upload_unmap(fd4_ctx->border_color_uploader);
- }
+ static const uint32_t bcolor_reg[] = {
+ [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+ [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+ };
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ bool needs_border = false;
+ unsigned i;
+
+ if (tex->num_samplers > 0) {
+ int num_samplers;
+
+ /* not sure if this is an a420.0 workaround, but we seem
+ * to need to emit these in pairs.. emit a final dummy
+ * entry if odd # of samplers:
+ */
+ num_samplers = align(tex->num_samplers, 2);
+
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_samplers; i++) {
+ static const struct fd4_sampler_stateobj dummy_sampler = {};
+ const struct fd4_sampler_stateobj *sampler =
+ tex->samplers[i] ? fd4_sampler_stateobj(tex->samplers[i])
+ : &dummy_sampler;
+ OUT_RING(ring, sampler->texsamp0);
+ OUT_RING(ring, sampler->texsamp1);
+
+ needs_border |= sampler->needs_border;
+ }
+
+ for (; i < num_samplers; i++) {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+
+ if (tex->num_textures > 0) {
+ unsigned num_textures = tex->num_textures + v->astc_srgb.count;
+
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ static const struct fd4_pipe_sampler_view dummy_view = {};
+ const struct fd4_pipe_sampler_view *view =
+ tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i])
+ : &dummy_view;
+
+ OUT_RING(ring, view->texconst0);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2);
+ OUT_RING(ring, view->texconst3);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
+ rsc = rsc->stencil;
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ for (i = 0; i < v->astc_srgb.count; i++) {
+ static const struct fd4_pipe_sampler_view dummy_view = {};
+ const struct fd4_pipe_sampler_view *view;
+ unsigned idx = v->astc_srgb.orig_idx[i];
+
+ view = tex->textures[idx] ? fd4_pipe_sampler_view(tex->textures[idx])
+ : &dummy_view;
+
+ debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
+
+ OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2);
+ OUT_RING(ring, view->texconst3);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ } else {
+ debug_assert(v->astc_srgb.count == 0);
+ }
+
+ if (needs_border) {
+ unsigned off;
+ void *ptr;
+
+ u_upload_alloc(fd4_ctx->border_color_uploader, 0,
+ BORDER_COLOR_UPLOAD_SIZE, BORDER_COLOR_UPLOAD_SIZE, &off,
+ &fd4_ctx->border_color_buf, &ptr);
+
+ fd_setup_border_colors(tex, ptr, 0);
+ OUT_PKT0(ring, bcolor_reg[sb], 1);
+ OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+
+ u_upload_unmap(fd4_ctx->border_color_uploader);
+ }
}
/* emit texture state for mem->gmem restore operation.. eventually it would
*/
void
fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
- struct pipe_surface **bufs)
+ struct pipe_surface **bufs)
{
- unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
- int i;
-
- for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
- }
-
- /* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * nr_bufs));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
- CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- for (i = 0; i < nr_bufs; i++) {
- OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
- OUT_RING(ring, 0x00000000);
- }
-
- /* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * nr_bufs));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
- CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- for (i = 0; i < nr_bufs; i++) {
- if (bufs[i]) {
- struct fd_resource *rsc = fd_resource(bufs[i]->texture);
- enum pipe_format format = fd_gmem_restore_format(bufs[i]->format);
-
- /* The restore blit_zs shader expects stencil in sampler 0,
- * and depth in sampler 1
- */
- if (rsc->stencil && (i == 0)) {
- rsc = rsc->stencil;
- format = fd_gmem_restore_format(rsc->b.b.format);
- }
-
- /* note: PIPE_BUFFER disallowed for surfaces */
- unsigned lvl = bufs[i]->u.tex.level;
- unsigned offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
-
- /* z32 restore is accomplished using depth write. If there is
- * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- * then no render target:
- *
- * (The same applies for z32_s8x24, since for stencil sampler
- * state the above 'if' will replace 'format' with s8)
- */
- if ((format == PIPE_FORMAT_Z32_FLOAT) ||
- (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
- mrt_comp[i] = 0;
-
- debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
-
- OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
- A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
- fd4_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
- OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
- A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)));
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- } else {
- OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
- A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
- A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
- A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
- A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
- A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
- OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
- A4XX_TEX_CONST_1_HEIGHT(0));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
- A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
- A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
- A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
- A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
- A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
- A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
- A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
+ int i;
+
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
+ }
+
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * nr_bufs));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
+ CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < nr_bufs; i++) {
+ OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * nr_bufs));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
+ CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < nr_bufs; i++) {
+ if (bufs[i]) {
+ struct fd_resource *rsc = fd_resource(bufs[i]->texture);
+ enum pipe_format format = fd_gmem_restore_format(bufs[i]->format);
+
+ /* The restore blit_zs shader expects stencil in sampler 0,
+ * and depth in sampler 1
+ */
+ if (rsc->stencil && (i == 0)) {
+ rsc = rsc->stencil;
+ format = fd_gmem_restore_format(rsc->b.b.format);
+ }
+
+ /* note: PIPE_BUFFER disallowed for surfaces */
+ unsigned lvl = bufs[i]->u.tex.level;
+ unsigned offset =
+ fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
+
+ /* z32 restore is accomplished using depth write. If there is
+ * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ * then no render target:
+ *
+ * (The same applies for z32_s8x24, since for stencil sampler
+ * state the above 'if' will replace 'format' with s8)
+ */
+ if ((format == PIPE_FORMAT_Z32_FLOAT) ||
+ (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
+ mrt_comp[i] = 0;
+
+ debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
+
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ fd4_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
+ A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)));
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) | A4XX_TEX_CONST_1_HEIGHT(0));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
}
void
fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
{
- int32_t i, j, last = -1;
- uint32_t total_in = 0;
- const struct fd_vertex_state *vtx = emit->vtx;
- const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0);
- unsigned instance_regid = regid(63, 0);
- unsigned vtxcnt_regid = regid(63, 0);
-
- /* Note that sysvals come *after* normal inputs: */
- for (i = 0; i < vp->inputs_count; i++) {
- if (!vp->inputs[i].compmask)
- continue;
- if (vp->inputs[i].sysval) {
- switch(vp->inputs[i].slot) {
- case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- vertex_regid = vp->inputs[i].regid;
- break;
- case SYSTEM_VALUE_INSTANCE_ID:
- instance_regid = vp->inputs[i].regid;
- break;
- case SYSTEM_VALUE_VERTEX_CNT:
- vtxcnt_regid = vp->inputs[i].regid;
- break;
- default:
- unreachable("invalid system value");
- break;
- }
- } else if (i < vtx->vtx->num_elements) {
- last = i;
- }
- }
-
- for (i = 0, j = 0; i <= last; i++) {
- assert(!vp->inputs[i].sysval);
- if (vp->inputs[i].compmask) {
- struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
- const struct pipe_vertex_buffer *vb =
- &vtx->vertexbuf.vb[elem->vertex_buffer_index];
- struct fd_resource *rsc = fd_resource(vb->buffer.resource);
- enum pipe_format pfmt = elem->src_format;
- enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
- bool switchnext = (i != last) ||
- (vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0)) ||
- (vtxcnt_regid != regid(63, 0));
- bool isint = util_format_is_pure_integer(pfmt);
- uint32_t fs = util_format_get_blocksize(pfmt);
- uint32_t off = vb->buffer_offset + elem->src_offset;
- uint32_t size = fd_bo_size(rsc->bo) - off;
- debug_assert(fmt != VFMT4_NONE);
+ int32_t i, j, last = -1;
+ uint32_t total_in = 0;
+ const struct fd_vertex_state *vtx = emit->vtx;
+ const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
+
+ /* Note that sysvals come *after* normal inputs: */
+ for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
+ if (vp->inputs[i].sysval) {
+ switch (vp->inputs[i].slot) {
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ vertex_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ instance_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_VERTEX_CNT:
+ vtxcnt_regid = vp->inputs[i].regid;
+ break;
+ default:
+ unreachable("invalid system value");
+ break;
+ }
+ } else if (i < vtx->vtx->num_elements) {
+ last = i;
+ }
+ }
+
+ for (i = 0, j = 0; i <= last; i++) {
+ assert(!vp->inputs[i].sysval);
+ if (vp->inputs[i].compmask) {
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ const struct pipe_vertex_buffer *vb =
+ &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+ struct fd_resource *rsc = fd_resource(vb->buffer.resource);
+ enum pipe_format pfmt = elem->src_format;
+ enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
+ bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+ bool isint = util_format_is_pure_integer(pfmt);
+ uint32_t fs = util_format_get_blocksize(pfmt);
+ uint32_t off = vb->buffer_offset + elem->src_offset;
+ uint32_t size = fd_bo_size(rsc->bo) - off;
+ debug_assert(fmt != VFMT4_NONE);
#ifdef DEBUG
- /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
- */
- if (off > fd_bo_size(rsc->bo))
- continue;
+ /* see
+ * dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
+ */
+ if (off > fd_bo_size(rsc->bo))
+ continue;
#endif
- OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
- COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
- COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
- OUT_RELOC(ring, rsc->bo, off, 0, 0);
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));
-
- OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
- OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
- A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
- A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
- A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
- A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
- COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
-
- total_in += util_bitcount(vp->inputs[i].compmask);
- j++;
- }
- }
-
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if (last < 0) {
- /* just recycle the shader bo, we just need to point to *something*
- * valid:
- */
- struct fd_bo *dummy_vbo = vp->bo;
- bool switchnext = (vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0)) ||
- (vtxcnt_regid != regid(63, 0));
-
- OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
- A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
- COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
- OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
- OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
-
- OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
- OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
- A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
- A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
- A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
- A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
- A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
- A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
-
- total_in = 1;
- j = 1;
- }
-
- OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
- OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
- 0xa0000 | /* XXX */
- A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
- A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
- OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
- A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
- A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
- OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
- OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
- OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
-
- /* cache invalidate, otherwise vertex fetch could see
- * stale vbo contents:
- */
- OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000012);
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+ COND(elem->instance_divisor,
+ A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, rsc->bo, off, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(
+ MAX2(1, elem->instance_divisor)));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring,
+ A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
+ A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in += util_bitcount(vp->inputs[i].compmask);
+ j++;
+ }
+ }
+
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+ A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A4XX_VFD_DECODE_INSTR_REGID(regid(0, 0)) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
+ OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+ 0xa0000 | /* XXX */
+ A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
+ A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+ OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
+ OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
+ OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
+
+ /* cache invalidate, otherwise vertex fetch could see
+ * stale vbo contents:
+ */
+ OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000012);
}
void
fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd4_emit *emit)
+ struct fd4_emit *emit)
{
- const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
- const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
- const enum fd_dirty_3d_state dirty = emit->dirty;
-
- emit_marker(ring, 5);
-
- if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
-
- for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
- A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
- A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
- A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
- A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
- A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
- A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
- A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
- struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- uint32_t rb_alpha_control = zsa->rb_alpha_control;
-
- if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
- rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
-
- OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, rb_alpha_control);
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
- OUT_RING(ring, zsa->rb_stencil_control);
- OUT_RING(ring, zsa->rb_stencil_control2);
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
- struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
- OUT_RING(ring, zsa->rb_stencilrefmask |
- A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
- OUT_RING(ring, zsa->rb_stencilrefmask_bf |
- A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
- bool fragz = fp->no_earlyz | fp->has_kill | fp->writes_pos;
- bool clamp = !ctx->rasterizer->depth_clip_near;
-
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, zsa->rb_depth_control |
- COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
- COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
- COND(fragz && fp->fragcoord_compmask != 0,
- A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
-
- /* maybe this register/bitfield needs a better name.. this
- * appears to be just disabling early-z
- */
- OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
- OUT_RING(ring, zsa->gras_alpha_control |
- COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
- COND(fragz && fp->fragcoord_compmask != 0,
- A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
- }
-
- if (dirty & FD_DIRTY_RASTERIZER) {
- struct fd4_rasterizer_stateobj *rasterizer =
- fd4_rasterizer_stateobj(ctx->rasterizer);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, rasterizer->gras_su_mode_control |
- A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
- OUT_RING(ring, rasterizer->gras_su_point_minmax);
- OUT_RING(ring, rasterizer->gras_su_point_size);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
- }
-
- /* NOTE: since primitive_restart is not actually part of any
- * state object, we need to make sure that we always emit
- * PRIM_VTX_CNTL.. either that or be more clever and detect
- * when it changes.
- */
- if (emit->info) {
- const struct pipe_draw_info *info = emit->info;
- struct fd4_rasterizer_stateobj *rast =
- fd4_rasterizer_stateobj(ctx->rasterizer);
- uint32_t val = rast->pc_prim_vtx_cntl;
-
- if (info->index_size && info->primitive_restart)
- val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
-
- val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
-
- if (fp->total_in > 0) {
- uint32_t varout = align(fp->total_in, 16) / 16;
- if (varout > 1)
- varout = align(varout, 2);
- val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
- }
-
- OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
- OUT_RING(ring, val);
- OUT_RING(ring, rast->pc_prim_vtx_cntl2);
- }
-
- /* NOTE: scissor enabled bit is part of rasterizer state: */
- if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
- struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
-
- ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
- ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
- ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
- ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- fd_wfi(ctx->batch, ring);
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
- }
-
- if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
- float zmin, zmax;
- int depth = 24;
- if (ctx->batch->framebuffer.zsbuf) {
- depth = util_format_get_component_bits(
- pipe_surface_format(ctx->batch->framebuffer.zsbuf),
- UTIL_FORMAT_COLORSPACE_ZS, 0);
- }
- util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
- &zmin, &zmax);
-
- OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
- if (depth == 32) {
- OUT_RING(ring, fui(zmin));
- OUT_RING(ring, fui(zmax));
- } else if (depth == 16) {
- OUT_RING(ring, (uint32_t)(zmin * 0xffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffff));
- } else {
- OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
- }
- }
-
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- unsigned n = pfb->nr_cbufs;
- /* if we have depth/stencil, we need at least on MRT: */
- if (pfb->zsbuf)
- n = MAX2(1, n);
- fd4_program_emit(ring, emit, n, pfb->cbufs);
- }
-
- if (!emit->skip_consts) { /* evil hack to deal sanely with clear path */
- ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
- if (!emit->binning_pass)
- ir3_emit_fs_consts(fp, ring, ctx);
- }
-
- if ((dirty & FD_DIRTY_BLEND)) {
- struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
- uint32_t i;
-
- for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- enum pipe_format format = pipe_surface_format(
- ctx->batch->framebuffer.cbufs[i]);
- bool is_int = util_format_is_pure_integer(format);
- bool has_alpha = util_format_has_alpha(format);
- uint32_t control = blend->rb_mrt[i].control;
-
- if (is_int) {
- control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
- control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
- }
-
- if (!has_alpha) {
- control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, control);
-
- OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control);
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
- OUT_RING(ring, blend->rb_fs_output |
- A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
- }
-
- if (dirty & FD_DIRTY_BLEND_COLOR) {
- struct pipe_blend_color *bcolor = &ctx->blend_color;
-
- OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
- OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
- A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
- A4XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
- OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
- A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
- A4XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
- OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[1]));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
- A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
- A4XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
- A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
- A4XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
- }
-
- if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX)
- emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX], vp);
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX)
- emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT], fp);
+ const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+ const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
+ const enum fd_dirty_3d_state dirty = emit->dirty;
+
+ emit_marker(ring, 5);
+
+ if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
+
+ for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
+ struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+ if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+ rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
+
+ OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring, rb_alpha_control);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+ OUT_RING(ring, zsa->rb_stencil_control);
+ OUT_RING(ring, zsa->rb_stencil_control2);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ bool fragz = fp->no_earlyz | fp->has_kill | fp->writes_pos;
+ bool clamp = !ctx->rasterizer->depth_clip_near;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_depth_control |
+ COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
+ COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
+ COND(fragz && fp->fragcoord_compmask != 0,
+ A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
+
+ /* maybe this register/bitfield needs a better name.. this
+ * appears to be just disabling early-z
+ */
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, zsa->gras_alpha_control |
+ COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
+ COND(fragz && fp->fragcoord_compmask != 0,
+ A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
+ }
+
+ if (dirty & FD_DIRTY_RASTERIZER) {
+ struct fd4_rasterizer_stateobj *rasterizer =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, rasterizer->gras_su_mode_control |
+ A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, rasterizer->gras_su_point_minmax);
+ OUT_RING(ring, rasterizer->gras_su_point_size);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+ }
+
+ /* NOTE: since primitive_restart is not actually part of any
+ * state object, we need to make sure that we always emit
+ * PRIM_VTX_CNTL.. either that or be more clever and detect
+ * when it changes.
+ */
+ if (emit->info) {
+ const struct pipe_draw_info *info = emit->info;
+ struct fd4_rasterizer_stateobj *rast =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+ uint32_t val = rast->pc_prim_vtx_cntl;
+
+ if (info->index_size && info->primitive_restart)
+ val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
+
+ val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
+
+ if (fp->total_in > 0) {
+ uint32_t varout = align(fp->total_in, 16) / 16;
+ if (varout > 1)
+ varout = align(varout, 2);
+ val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
+ OUT_RING(ring, val);
+ OUT_RING(ring, rast->pc_prim_vtx_cntl2);
+ }
+
+ /* NOTE: scissor enabled bit is part of rasterizer state: */
+ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
+
+ ctx->batch->max_scissor.minx =
+ MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+ ctx->batch->max_scissor.miny =
+ MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+ ctx->batch->max_scissor.maxx =
+ MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+ ctx->batch->max_scissor.maxy =
+ MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+ }
+
+ if (dirty &
+ (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ float zmin, zmax;
+ int depth = 24;
+ if (ctx->batch->framebuffer.zsbuf) {
+ depth = util_format_get_component_bits(
+ pipe_surface_format(ctx->batch->framebuffer.zsbuf),
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ }
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+ if (depth == 32) {
+ OUT_RING(ring, fui(zmin));
+ OUT_RING(ring, fui(zmax));
+ } else if (depth == 16) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+ } else {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+ }
+ }
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ unsigned n = pfb->nr_cbufs;
+ /* if we have depth/stencil, we need at least on MRT: */
+ if (pfb->zsbuf)
+ n = MAX2(1, n);
+ fd4_program_emit(ring, emit, n, pfb->cbufs);
+ }
+
+ if (!emit->skip_consts) { /* evil hack to deal sanely with clear path */
+ ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
+ if (!emit->binning_pass)
+ ir3_emit_fs_consts(fp, ring, ctx);
+ }
+
+ if ((dirty & FD_DIRTY_BLEND)) {
+ struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
+ uint32_t i;
+
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format format =
+ pipe_surface_format(ctx->batch->framebuffer.cbufs[i]);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+
+ if (is_int) {
+ control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (!has_alpha) {
+ control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, control);
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+ OUT_RING(ring,
+ blend->rb_fs_output | A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+ }
+
+ if (dirty & FD_DIRTY_BLEND_COLOR) {
+ struct pipe_blend_color *bcolor = &ctx->blend_color;
+
+ OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
+ OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
+ A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
+ A4XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
+ OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
+ A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
+ A4XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
+ OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[1]));
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
+ A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
+ A4XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
+ A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
+ A4XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
+ }
+
+ if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX)
+ emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX], vp);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX)
+ emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT], fp);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
void
fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = batch->ctx;
- struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_context *ctx = batch->ctx;
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
- OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
- OUT_RING(ring, 0x00000001);
+ OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
+ OUT_RING(ring, 0x00000001);
- OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
- OUT_RING(ring, 0x00000006);
+ OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
+ OUT_RING(ring, 0x00000006);
- OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
- OUT_RING(ring, 0x0000003a);
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
+ OUT_RING(ring, 0x0000003a);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
- OUT_RING(ring, 0x00000001);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
+ OUT_RING(ring, 0x00000001);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
- OUT_RING(ring, 0x00000007);
+ OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
+ OUT_RING(ring, 0x00000007);
- OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000012);
+ OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000012);
- OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
- OUT_RING(ring, 0x00000006);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
+ OUT_RING(ring, 0x00000006);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
- OUT_RING(ring, 0x00040000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
+ OUT_RING(ring, 0x00040000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
- OUT_RING(ring, 0x00001000);
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00001000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
- OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
- A4XX_RB_BLEND_RED_FLOAT(0.0));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
- A4XX_RB_BLEND_GREEN_FLOAT(0.0));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
- A4XX_RB_BLEND_BLUE_FLOAT(0.0));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
- A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
+ OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
+ OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) | A4XX_RB_BLEND_RED_FLOAT(0.0));
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) | A4XX_RB_BLEND_GREEN_FLOAT(0.0));
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) | A4XX_RB_BLEND_BLUE_FLOAT(0.0));
+ OUT_RING(ring,
+ A4XX_RB_BLEND_ALPHA_UINT(0x7fff) | A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
- OUT_RING(ring, 0x0000001d);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
+ OUT_RING(ring, 0x0000001d);
- OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
- OUT_RING(ring, 0x00000001);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
+ OUT_RING(ring, 0x00000001);
- OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
- OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
- A4XX_TPL1_TP_TEX_COUNT_HS(0) |
- A4XX_TPL1_TP_TEX_COUNT_DS(0) |
- A4XX_TPL1_TP_TEX_COUNT_GS(0));
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
+ OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) | A4XX_TPL1_TP_TEX_COUNT_HS(0) |
+ A4XX_TPL1_TP_TEX_COUNT_DS(0) |
+ A4XX_TPL1_TP_TEX_COUNT_GS(0));
- OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
- OUT_RING(ring, 16);
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
+ OUT_RING(ring, 16);
- /* we don't use this yet.. probably best to disable.. */
- OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ /* we don't use this yet.. probably best to disable.. */
+ OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
- OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */
- OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
+ OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
+ OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */
+ OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0, 0, 0); /* SP_VS_PVT_MEM_ADDR */
- OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
- OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */
- OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
+ OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
+ OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */
+ OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0, 0, 0); /* SP_FS_PVT_MEM_ADDR */
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
- OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
- A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
+ OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
+ A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
- OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
- A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
- OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+ OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
- OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
- OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+ OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+ OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
- OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, 0x0);
- fd_hw_query_enable(batch, ring);
+ fd_hw_query_enable(batch, ring);
}
static void
fd4_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
- unsigned dst_off, struct pipe_resource *src, unsigned src_off,
- unsigned sizedwords)
+ unsigned dst_off, struct pipe_resource *src, unsigned src_off,
+ unsigned sizedwords)
{
- struct fd_bo *src_bo = fd_resource(src)->bo;
- struct fd_bo *dst_bo = fd_resource(dst)->bo;
- unsigned i;
-
- for (i = 0; i < sizedwords; i++) {
- OUT_PKT3(ring, CP_MEM_TO_MEM, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
- OUT_RELOC(ring, src_bo, src_off, 0, 0);
-
- dst_off += 4;
- src_off += 4;
- }
+ struct fd_bo *src_bo = fd_resource(src)->bo;
+ struct fd_bo *dst_bo = fd_resource(dst)->bo;
+ unsigned i;
+
+ for (i = 0; i < sizedwords; i++) {
+ OUT_PKT3(ring, CP_MEM_TO_MEM, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
+ OUT_RELOC(ring, src_bo, src_off, 0, 0);
+
+ dst_off += 4;
+ src_off += 4;
+ }
}
void
fd4_emit_init_screen(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- screen->emit_ib = fd4_emit_ib;
- screen->mem_to_mem = fd4_mem_to_mem;
+ screen->emit_ib = fd4_emit_ib;
+ screen->mem_to_mem = fd4_mem_to_mem;
}
void
#include "pipe/p_context.h"
-#include "freedreno_context.h"
#include "fd4_format.h"
#include "fd4_program.h"
+#include "freedreno_context.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
-void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- unsigned nr_bufs, struct pipe_surface **bufs);
+void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ struct pipe_surface **bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit {
- struct pipe_debug_callback *debug;
- const struct fd_vertex_state *vtx;
- const struct fd4_program_state *prog;
- const struct pipe_draw_info *info;
- const struct pipe_draw_indirect_info *indirect;
- const struct pipe_draw_start_count *draw;
- bool binning_pass;
- struct ir3_cache_key key;
- enum fd_dirty_3d_state dirty;
-
- uint32_t sprite_coord_enable; /* bitmask */
- bool sprite_coord_mode;
- bool rasterflat;
- bool no_decode_srgb;
- bool skip_consts;
-
- /* cached to avoid repeated lookups of same variants: */
- const struct ir3_shader_variant *vs, *fs;
- /* TODO: other shader stages.. */
+ struct pipe_debug_callback *debug;
+ const struct fd_vertex_state *vtx;
+ const struct fd4_program_state *prog;
+ const struct pipe_draw_info *info;
+ const struct pipe_draw_indirect_info *indirect;
+ const struct pipe_draw_start_count *draw;
+ bool binning_pass;
+ struct ir3_cache_key key;
+ enum fd_dirty_3d_state dirty;
+
+ uint32_t sprite_coord_enable; /* bitmask */
+ bool sprite_coord_mode;
+ bool rasterflat;
+ bool no_decode_srgb;
+ bool skip_consts;
+
+ /* cached to avoid repeated lookups of same variants: */
+ const struct ir3_shader_variant *vs, *fs;
+ /* TODO: other shader stages.. */
};
-static inline enum a4xx_color_fmt fd4_emit_format(struct pipe_surface *surf)
+static inline enum a4xx_color_fmt
+fd4_emit_format(struct pipe_surface *surf)
{
- if (!surf)
- return 0;
- return fd4_pipe2color(surf->format);
+ if (!surf)
+ return 0;
+ return fd4_pipe2color(surf->format);
}
static inline const struct ir3_shader_variant *
fd4_emit_get_vp(struct fd4_emit *emit)
{
- if (!emit->vs) {
- emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
- }
- return emit->vs;
+ if (!emit->vs) {
+ emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
+ }
+ return emit->vs;
}
static inline const struct ir3_shader_variant *
fd4_emit_get_fp(struct fd4_emit *emit)
{
- if (!emit->fs) {
- if (emit->binning_pass) {
- /* use dummy stateobj to simplify binning vs non-binning: */
- static const struct ir3_shader_variant binning_fs = {};
- emit->fs = &binning_fs;
- } else {
- emit->fs = emit->prog->fs;
- }
- }
- return emit->fs;
+ if (!emit->fs) {
+ if (emit->binning_pass) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct ir3_shader_variant binning_fs = {};
+ emit->fs = &binning_fs;
+ } else {
+ emit->fs = emit->prog->fs;
+ }
+ }
+ return emit->fs;
}
-void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) assert_dt;
+void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ struct fd4_emit *emit) assert_dt;
void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd4_emit *emit) assert_dt;
+ struct fd4_emit *emit) assert_dt;
-void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
+void fd4_emit_restore(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt;
void fd4_emit_init_screen(struct pipe_screen *pscreen);
void fd4_emit_init(struct pipe_context *pctx);
static inline void
fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- __OUT_IB(ring, true, target);
+ __OUT_IB(ring, true, target);
}
#endif /* FD4_EMIT_H */
#include "fd4_format.h"
-
/* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables.
*/
struct fd4_format {
- enum a4xx_vtx_fmt vtx;
- enum a4xx_tex_fmt tex;
- enum a4xx_color_fmt rb;
- enum a3xx_color_swap swap;
- boolean present;
+ enum a4xx_vtx_fmt vtx;
+ enum a4xx_tex_fmt tex;
+ enum a4xx_color_fmt rb;
+ enum a3xx_color_swap swap;
+ boolean present;
};
/* vertex + texture */
-#define VT(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT4_ ## fmt, \
- .tex = TFMT4_ ## fmt, \
- .rb = RB4_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT4_##fmt, \
+ .tex = TFMT4_##fmt, \
+ .rb = RB4_##rbfmt, \
+ .swap = swapfmt}
/* texture-only */
-#define _T(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT4_NONE, \
- .tex = TFMT4_ ## fmt, \
- .rb = RB4_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT4_NONE, \
+ .tex = TFMT4_##fmt, \
+ .rb = RB4_##rbfmt, \
+ .swap = swapfmt}
/* vertex-only */
-#define V_(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT4_ ## fmt, \
- .tex = TFMT4_NONE, \
- .rb = RB4_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT4_##fmt, \
+ .tex = TFMT4_NONE, \
+ .rb = RB4_##rbfmt, \
+ .swap = swapfmt}
/* clang-format off */
static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
enum a4xx_vtx_fmt
fd4_pipe2vtx(enum pipe_format format)
{
- if (!formats[format].present)
- return VFMT4_NONE;
- return formats[format].vtx;
+ if (!formats[format].present)
+ return VFMT4_NONE;
+ return formats[format].vtx;
}
/* convert pipe format to texture sampler format: */
enum a4xx_tex_fmt
fd4_pipe2tex(enum pipe_format format)
{
- if (!formats[format].present)
- return TFMT4_NONE;
- return formats[format].tex;
+ if (!formats[format].present)
+ return TFMT4_NONE;
+ return formats[format].tex;
}
/* convert pipe format to MRT / copydest format used for render-target: */
enum a4xx_color_fmt
fd4_pipe2color(enum pipe_format format)
{
- if (!formats[format].present)
- return RB4_NONE;
- return formats[format].rb;
+ if (!formats[format].present)
+ return RB4_NONE;
+ return formats[format].rb;
}
enum a3xx_color_swap
fd4_pipe2swap(enum pipe_format format)
{
- if (!formats[format].present)
- return WZYX;
- return formats[format].swap;
+ if (!formats[format].present)
+ return WZYX;
+ return formats[format].swap;
}
enum a4xx_depth_format
fd4_pipe2depth(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTH4_16;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- return DEPTH4_24_8;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return DEPTH4_32;
- default:
- return ~0;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH4_16;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ return DEPTH4_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTH4_32;
+ default:
+ return ~0;
+ }
}
static inline enum a4xx_tex_swiz
tex_swiz(unsigned swiz)
{
- switch (swiz) {
- default:
- case PIPE_SWIZZLE_X: return A4XX_TEX_X;
- case PIPE_SWIZZLE_Y: return A4XX_TEX_Y;
- case PIPE_SWIZZLE_Z: return A4XX_TEX_Z;
- case PIPE_SWIZZLE_W: return A4XX_TEX_W;
- case PIPE_SWIZZLE_0: return A4XX_TEX_ZERO;
- case PIPE_SWIZZLE_1: return A4XX_TEX_ONE;
- }
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_X:
+ return A4XX_TEX_X;
+ case PIPE_SWIZZLE_Y:
+ return A4XX_TEX_Y;
+ case PIPE_SWIZZLE_Z:
+ return A4XX_TEX_Z;
+ case PIPE_SWIZZLE_W:
+ return A4XX_TEX_W;
+ case PIPE_SWIZZLE_0:
+ return A4XX_TEX_ZERO;
+ case PIPE_SWIZZLE_1:
+ return A4XX_TEX_ONE;
+ }
}
uint32_t
fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+ unsigned swizzle_b, unsigned swizzle_a)
{
- const struct util_format_description *desc =
- util_format_description(format);
- unsigned char swiz[4] = {
- swizzle_r, swizzle_g, swizzle_b, swizzle_a,
- }, rswiz[4];
-
- util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
-
- return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
- A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
- A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
- A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned char swiz[4] =
+ {
+ swizzle_r,
+ swizzle_g,
+ swizzle_b,
+ swizzle_a,
+ },
+ rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+ A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}
enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
- unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+ unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
#endif /* FD4_UTIL_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_draw.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd4_gmem.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h"
-#include "fd4_program.h"
#include "fd4_format.h"
+#include "fd4_gmem.h"
+#include "fd4_program.h"
#include "fd4_zsa.h"
static void
-fd4_gmem_emit_set_prog(struct fd_context *ctx, struct fd4_emit *emit, struct fd_program_stateobj *prog)
+fd4_gmem_emit_set_prog(struct fd_context *ctx, struct fd4_emit *emit,
+ struct fd_program_stateobj *prog)
{
- emit->skip_consts = true;
- emit->key.vs = prog->vs;
- emit->key.fs = prog->fs;
- emit->prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));
- /* reset the fd4_emit_get_*p cache */
- emit->vs = NULL;
- emit->fs = NULL;
+ emit->skip_consts = true;
+ emit->key.vs = prog->vs;
+ emit->key.fs = prog->fs;
+ emit->prog = fd4_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));
+ /* reset the fd4_emit_get_*p cache */
+ emit->vs = NULL;
+ emit->fs = NULL;
}
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
- struct pipe_surface **bufs, const uint32_t *bases,
- uint32_t bin_w, bool decode_srgb)
+ struct pipe_surface **bufs, const uint32_t *bases, uint32_t bin_w,
+ bool decode_srgb)
{
- enum a4xx_tile_mode tile_mode;
- unsigned i;
-
- if (bin_w) {
- tile_mode = 2;
- } else {
- tile_mode = TILE4_LINEAR;
- }
-
- for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- enum a4xx_color_fmt format = 0;
- enum a3xx_color_swap swap = WZYX;
- bool srgb = false;
- struct fd_resource *rsc = NULL;
- uint32_t stride = 0;
- uint32_t base = 0;
- uint32_t offset = 0;
-
- if ((i < nr_bufs) && bufs[i]) {
- struct pipe_surface *psurf = bufs[i];
- enum pipe_format pformat = psurf->format;
-
- rsc = fd_resource(psurf->texture);
-
- /* In case we're drawing to Z32F_S8, the "color" actually goes to
- * the stencil
- */
- if (rsc->stencil) {
- rsc = rsc->stencil;
- pformat = rsc->b.b.format;
- if (bases)
- bases++;
- }
-
- format = fd4_pipe2color(pformat);
- swap = fd4_pipe2swap(pformat);
-
- if (decode_srgb)
- srgb = util_format_is_srgb(pformat);
- else
- pformat = util_format_linear(pformat);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
-
- if (bin_w) {
- stride = bin_w << fdl_cpp_shift(&rsc->layout);
-
- if (bases) {
- base = bases[i];
- }
- } else {
- stride = fd_resource_pitch(rsc, psurf->u.tex.level);
- }
- } else if ((i < nr_bufs) && bases) {
- base = bases[i];
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
- OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
- A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
- A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
- A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
- COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
- if (bin_w || (i >= nr_bufs) || !bufs[i]) {
- OUT_RING(ring, base);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
- } else {
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
- /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
- * not sure if we need to skip it for bypass or
- * not.
- */
- OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
- }
- }
+ enum a4xx_tile_mode tile_mode;
+ unsigned i;
+
+ if (bin_w) {
+ tile_mode = 2;
+ } else {
+ tile_mode = TILE4_LINEAR;
+ }
+
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ enum a4xx_color_fmt format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ bool srgb = false;
+ struct fd_resource *rsc = NULL;
+ uint32_t stride = 0;
+ uint32_t base = 0;
+ uint32_t offset = 0;
+
+ if ((i < nr_bufs) && bufs[i]) {
+ struct pipe_surface *psurf = bufs[i];
+ enum pipe_format pformat = psurf->format;
+
+ rsc = fd_resource(psurf->texture);
+
+ /* In case we're drawing to Z32F_S8, the "color" actually goes to
+ * the stencil
+ */
+ if (rsc->stencil) {
+ rsc = rsc->stencil;
+ pformat = rsc->b.b.format;
+ if (bases)
+ bases++;
+ }
+
+ format = fd4_pipe2color(pformat);
+ swap = fd4_pipe2swap(pformat);
+
+ if (decode_srgb)
+ srgb = util_format_is_srgb(pformat);
+ else
+ pformat = util_format_linear(pformat);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ offset = fd_resource_offset(rsc, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+
+ if (bin_w) {
+ stride = bin_w << fdl_cpp_shift(&rsc->layout);
+
+ if (bases) {
+ base = bases[i];
+ }
+ } else {
+ stride = fd_resource_pitch(rsc, psurf->u.tex.level);
+ }
+ } else if ((i < nr_bufs) && bases) {
+ base = bases[i];
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
+ OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+ A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+ A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+ COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+ if (bin_w || (i >= nr_bufs) || !bufs[i]) {
+ OUT_RING(ring, base);
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
+ } else {
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
+ * not sure if we need to skip it for bypass or
+ * not.
+ */
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
+ }
+ }
}
static bool
use_hw_binning(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- if ((gmem->maxpw * gmem->maxph) > 32)
- return false;
+ if ((gmem->maxpw * gmem->maxph) > 32)
+ return false;
- if ((gmem->maxpw > 15) || (gmem->maxph > 15))
- return false;
+ if ((gmem->maxpw > 15) || (gmem->maxph > 15))
+ return false;
- return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
}
/* transfer from gmem to system memory (ie. normal RAM) */
static void
-emit_gmem2mem_surf(struct fd_batch *batch, bool stencil,
- uint32_t base, struct pipe_surface *psurf)
+emit_gmem2mem_surf(struct fd_batch *batch, bool stencil, uint32_t base,
+ struct pipe_surface *psurf)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- enum pipe_format pformat = psurf->format;
- uint32_t offset, pitch;
-
- if (!rsc->valid)
- return;
-
- if (stencil) {
- debug_assert(rsc->stencil);
- rsc = rsc->stencil;
- pformat = rsc->b.b.format;
- }
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
- pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
- OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
- A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
- A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
- OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
- OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(pitch));
- OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
- A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
- A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
- A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
- A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
-
- fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ enum pipe_format pformat = psurf->format;
+ uint32_t offset, pitch;
+
+ if (!rsc->valid)
+ return;
+
+ if (stencil) {
+ debug_assert(rsc->stencil);
+ rsc = rsc->stencil;
+ pformat = rsc->b.b.format;
+ }
+
+ offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
+ A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(pitch));
+ OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
+ A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
+ A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+ A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
+
+ fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
}
static void
-fd4_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd4_emit_tile_gmem2mem(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd4_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->solid_vbuf_state,
- };
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);
-
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
- OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
- A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
- A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
- OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
- OUT_RING(ring, 0xff000000 |
- A4XX_RB_STENCILREFMASK_STENCILREF(0) |
- A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
- A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 |
- A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
- A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
- A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
-
- fd_wfi(batch, ring);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
- 0xa); /* XXX */
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
-
- OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
- OUT_RING(ring, 0x00000002);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
-
- OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
- OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
-
- fd4_program_emit(ring, &emit, 0, NULL);
- fd4_emit_vertex_bufs(ring, &emit);
-
- if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
- emit_gmem2mem_surf(batch, false, gmem->zsbuf_base[0], pfb->zsbuf);
- if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
- emit_gmem2mem_surf(batch, true, gmem->zsbuf_base[1], pfb->zsbuf);
- }
-
- if (batch->resolve & FD_BUFFER_COLOR) {
- unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_gmem2mem_surf(batch, false, gmem->cbuf_base[i], pfb->cbufs[i]);
- }
- }
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd4_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->solid_vbuf_state,
+ };
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+ OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, 0xff000000 | A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 | A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+ fd_wfi(batch, ring);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 0xa); /* XXX */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, 0x00000002);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
+
+ fd4_program_emit(ring, &emit, 0, NULL);
+ fd4_emit_vertex_bufs(ring, &emit);
+
+ if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
+ emit_gmem2mem_surf(batch, false, gmem->zsbuf_base[0], pfb->zsbuf);
+ if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(batch, true, gmem->zsbuf_base[1], pfb->zsbuf);
+ }
+
+ if (batch->resolve & FD_BUFFER_COLOR) {
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(batch, false, gmem->cbuf_base[i], pfb->cbufs[i]);
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
}
/* transfer from system memory to gmem */
static void
emit_mem2gmem_surf(struct fd_batch *batch, const uint32_t *bases,
- struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
+ struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_surface *zsbufs[2];
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_surface *zsbufs[2];
- emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
+ emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
- if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
- /* The gmem_restore_tex logic will put the first buffer's stencil
- * as color. Supply it with the proper information to make that
- * happen.
- */
- zsbufs[0] = zsbufs[1] = bufs[0];
- bufs = zsbufs;
- nr_bufs = 2;
- }
+ if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ /* The gmem_restore_tex logic will put the first buffer's stencil
+ * as color. Supply it with the proper information to make that
+ * happen.
+ */
+ zsbufs[0] = zsbufs[1] = bufs[0];
+ bufs = zsbufs;
+ nr_bufs = 2;
+ }
- fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
+ fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
- fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
+ fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
}
static void
-fd4_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd4_emit_tile_mem2gmem(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd4_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->blit_vbuf_state,
- .sprite_coord_enable = 1,
- .no_decode_srgb = true,
- };
- /* NOTE: They all use the same VP, this is for vtx bufs. */
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
-
- unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
- float x0, y0, x1, y1;
- unsigned bin_w = tile->bin_w;
- unsigned bin_h = tile->bin_h;
- unsigned i;
-
- /* write texture coordinates to vertexbuf: */
- x0 = ((float)tile->xoff) / ((float)pfb->width);
- x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
- y0 = ((float)tile->yoff) / ((float)pfb->height);
- y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
-
- OUT_PKT3(ring, CP_MEM_WRITE, 5);
- OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
- OUT_RING(ring, fui(x0));
- OUT_RING(ring, fui(y0));
- OUT_RING(ring, fui(x1));
- OUT_RING(ring, fui(y1));
-
- for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
-
- OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
- A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
-
- OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
- A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
- }
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
- A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
- A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
- A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
- A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
- A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
- A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
- A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
-
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x280000); /* XXX GRAS_CL_CLIP_CNTL */
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
- A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
- OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
- OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
-
- OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
- A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
- OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
- A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
- A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
- OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
-
- OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
- A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
-
- OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
- OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
-
- fd4_emit_vertex_bufs(ring, &emit);
-
- /* for gmem pitch/base calculations, we need to use the non-
- * truncated tile sizes:
- */
- bin_w = gmem->bin_w;
- bin_h = gmem->bin_h;
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);
- fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
- emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
- }
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- switch (pfb->zsbuf->format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- case PIPE_FORMAT_Z32_FLOAT:
- if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);
- else
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);
-
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
- A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
- A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
- A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
-
- break;
- default:
- /* Non-float can use a regular color write. It's split over 8-bit
- * components, so half precision is always sufficient.
- */
- fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
- break;
- }
- fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
- emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
- }
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
- A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
- 0x00010000); /* XXX */
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd4_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->blit_vbuf_state,
+ .sprite_coord_enable = 1,
+ .no_decode_srgb = true,
+ };
+ /* NOTE: They all use the same VP, this is for vtx bufs. */
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
+
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
+ float x0, y0, x1, y1;
+ unsigned bin_w = tile->bin_w;
+ unsigned bin_h = tile->bin_h;
+ unsigned i;
+
+ /* write texture coordinates to vertexbuf: */
+ x0 = ((float)tile->xoff) / ((float)pfb->width);
+ x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+ y0 = ((float)tile->yoff) / ((float)pfb->height);
+ y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+
+ OUT_PKT3(ring, CP_MEM_WRITE, 5);
+ OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
+ OUT_RING(ring, fui(x0));
+ OUT_RING(ring, fui(y0));
+ OUT_RING(ring, fui(x1));
+ OUT_RING(ring, fui(y1));
+
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(
+ ring,
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x280000); /* XXX GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
+ A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h / 2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+ OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+ A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
+ A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
+
+ fd4_emit_vertex_bufs(ring, &emit);
+
+ /* for gmem pitch/base calculations, we need to use the non-
+ * truncated tile sizes:
+ */
+ bin_w = gmem->bin_w;
+ bin_h = gmem->bin_h;
+
+ if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);
+ fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+ emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs,
+ bin_w);
+ }
+
+ if (fd_gmem_needs_restore(batch, tile,
+ FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ switch (pfb->zsbuf->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);
+ else
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
+ A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
+
+ break;
+ default:
+ /* Non-float can use a regular color write. It's split over 8-bit
+ * components, so half precision is always sufficient.
+ */
+ fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
+ break;
+ }
+ fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
+ emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+ 0x00010000); /* XXX */
}
static void
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
{
- unsigned i;
- for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
- *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
- }
- util_dynarray_clear(&batch->draw_patches);
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+ *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+ }
+ util_dynarray_clear(&batch->draw_patches);
}
/* for rendering directly to system memory: */
static void
-fd4_emit_sysmem_prep(struct fd_batch *batch)
- assert_dt
+fd4_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
- fd4_emit_restore(batch, ring);
+ fd4_emit_restore(batch, ring);
- OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+ OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
- /* setup scissor/offset for current tile: */
- OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
- OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
- A4XX_RB_BIN_OFFSET_Y(0));
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+ OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) | A4XX_RB_BIN_OFFSET_Y(0));
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
- OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
- A4XX_RB_MODE_CONTROL_HEIGHT(0) |
- 0x00c00000); /* XXX */
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(0) | 0x00c00000); /* XXX */
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, 0x8);
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, 0x8);
- patch_draws(batch, IGNORE_VISIBILITY);
+ patch_draws(batch, IGNORE_VISIBILITY);
}
static void
-update_vsc_pipe(struct fd_batch *batch)
- assert_dt
+update_vsc_pipe(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
-
- OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
- for (i = 0; i < 8; i++) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
- A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
- A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
- A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
- }
-
- OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
- for (i = 0; i < 8; i++) {
- if (!ctx->vsc_pipe_bo[i]) {
- ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, 0x40000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
- }
- OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */
- }
-
- OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
- for (i = 0; i < 8; i++) {
- OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
- }
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOC(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+ A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+ A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+ A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ if (!ctx->vsc_pipe_bo[i]) {
+ ctx->vsc_pipe_bo[i] = fd_bo_new(
+ ctx->dev, 0x40000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
+ }
+ OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
+ 0); /* VSC_PIPE_DATA_ADDRESS[i] */
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
+ 32); /* VSC_PIPE_DATA_LENGTH[i] */
+ }
}
static void
-emit_binning_pass(struct fd_batch *batch)
- assert_dt
+emit_binning_pass(struct fd_batch *batch) assert_dt
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- uint32_t x1 = gmem->minx;
- uint32_t y1 = gmem->miny;
- uint32_t x2 = gmem->minx + gmem->width - 1;
- uint32_t y2 = gmem->miny + gmem->height - 1;
-
- OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
- OUT_RING(ring, A4XX_PC_BINNING_COMMAND_BINNING_ENABLE);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
-
- /* setup scissor/offset for whole screen: */
- OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
- OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(x1) |
- A4XX_RB_BIN_OFFSET_Y(y1));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
-
- for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
- OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
- A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
- }
-
- /* emit IB to binning drawcmds: */
- fd4_emit_ib(ring, batch->binning);
-
- fd_reset_wfi(batch);
- fd_wfi(batch, ring);
-
- /* and then put stuff back the way it was: */
-
- OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
- OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
- A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
- A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
-
- fd_event_write(batch, ring, CACHE_FLUSH);
- fd_wfi(batch, ring);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ uint32_t x1 = gmem->minx;
+ uint32_t y1 = gmem->miny;
+ uint32_t x2 = gmem->minx + gmem->width - 1;
+ uint32_t y2 = gmem->miny + gmem->height - 1;
+
+ OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
+ OUT_RING(ring, A4XX_PC_BINNING_COMMAND_BINNING_ENABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+
+ /* setup scissor/offset for whole screen: */
+ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+ OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(x1) | A4XX_RB_BIN_OFFSET_Y(y1));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+ }
+
+ /* emit IB to binning drawcmds: */
+ fd4_emit_ib(ring, batch->binning);
+
+ fd_reset_wfi(batch);
+ fd_wfi(batch, ring);
+
+ /* and then put stuff back the way it was: */
+
+ OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ fd_event_write(batch, ring, CACHE_FLUSH);
+ fd_wfi(batch, ring);
}
/* before first tile */
static void
-fd4_emit_tile_init(struct fd_batch *batch)
- assert_dt
+fd4_emit_tile_init(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- fd4_emit_restore(batch, ring);
+ fd4_emit_restore(batch, ring);
- OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
- OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
- A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+ OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
- update_vsc_pipe(batch);
+ update_vsc_pipe(batch);
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
- OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
- A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- if (use_hw_binning(batch)) {
- OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
- A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
+ if (use_hw_binning(batch)) {
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_RENDER_CONTROL_BINNING_PASS |
- A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
- 0x8);
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_CONTROL_BINNING_PASS |
+ A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 0x8);
- /* emit hw binning pass: */
- emit_binning_pass(batch);
+ /* emit hw binning pass: */
+ emit_binning_pass(batch);
- patch_draws(batch, USE_VISIBILITY);
- } else {
- patch_draws(batch, IGNORE_VISIBILITY);
- }
+ patch_draws(batch, USE_VISIBILITY);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
- OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
- OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
- A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
- A4XX_RB_MODE_CONTROL_ENABLE_GMEM);
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+ A4XX_RB_MODE_CONTROL_ENABLE_GMEM);
}
/* before mem2gmem */
static void
fd4_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
-
- if (pfb->zsbuf) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- uint32_t cpp = rsc->layout.cpp;
-
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
- OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
- A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
- OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
- OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
- if (rsc->stencil) {
- OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
- A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
- OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->layout.cpp * gmem->bin_w));
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
- } else {
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
- OUT_RING(ring, 0); /* RB_STENCIL_INFO */
- OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
- }
-
- OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
- if (pfb->zsbuf) {
- OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
- fd4_pipe2depth(pfb->zsbuf->format)));
- } else {
- OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
- }
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t cpp = rsc->layout.cpp;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+ OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
+ A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(
+ fd4_pipe2depth(pfb->zsbuf->format)));
+ OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+ OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ if (rsc->stencil) {
+ OUT_RING(ring,
+ A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
+ A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+ OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->layout.cpp *
+ gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ } else {
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+ OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
+ }
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
+ if (pfb->zsbuf) {
+ OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
+ fd4_pipe2depth(pfb->zsbuf->format)));
+ } else {
+ OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
+ }
}
/* before IB to rendering cmds: */
static void
-fd4_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd4_emit_tile_renderprep(struct fd_batch *batch,
+ const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- uint32_t x1 = tile->xoff;
- uint32_t y1 = tile->yoff;
- uint32_t x2 = tile->xoff + tile->bin_w - 1;
- uint32_t y2 = tile->yoff + tile->bin_h - 1;
-
- if (use_hw_binning(batch)) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
- struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
-
- assert(pipe->w && pipe->h);
-
- fd_event_write(batch, ring, HLSQ_FLUSH);
- fd_wfi(batch, ring);
-
- OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, A4XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
- A4XX_PC_VSTREAM_CONTROL_N(tile->n));
-
- OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
- OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
- OUT_RELOC(ring, fd4_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
- (tile->p * 4), 0, 0);
- } else {
- OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
- OUT_RING(ring, 0x00000000);
- }
-
- OUT_PKT3(ring, CP_SET_BIN, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
- OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
-
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
-
- /* setup scissor/offset for current tile: */
- OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
- OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
- A4XX_RB_BIN_OFFSET_Y(tile->yoff));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
- A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
- OUT_RING(ring, 0x8);
+ struct fd_context *ctx = batch->ctx;
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ uint32_t x1 = tile->xoff;
+ uint32_t y1 = tile->yoff;
+ uint32_t x2 = tile->xoff + tile->bin_w - 1;
+ uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+ if (use_hw_binning(batch)) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
+ struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
+
+ assert(pipe->w && pipe->h);
+
+ fd_event_write(batch, ring, HLSQ_FLUSH);
+ fd_wfi(batch, ring);
+
+ OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A4XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
+ A4XX_PC_VSTREAM_CONTROL_N(tile->n));
+
+ OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
+ OUT_RELOC(ring, pipe_bo, 0, 0,
+ 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd4_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <-
+ VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT3(ring, CP_SET_BIN, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+ OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w,
+ true);
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+ OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
+ A4XX_RB_BIN_OFFSET_Y(tile->yoff));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, 0x8);
}
void
-fd4_gmem_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd4_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
- ctx->emit_tile_init = fd4_emit_tile_init;
- ctx->emit_tile_prep = fd4_emit_tile_prep;
- ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
- ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
- ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
+ ctx->emit_tile_init = fd4_emit_tile_init;
+ ctx->emit_tile_prep = fd4_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_program.h"
-#include "fd4_program.h"
#include "fd4_emit.h"
-#include "fd4_texture.h"
#include "fd4_format.h"
+#include "fd4_program.h"
+#include "fd4_texture.h"
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
- const struct ir3_info *si = &so->info;
- enum a4xx_state_block sb = fd4_stage2shadersb(so->type);
- enum a4xx_state_src src;
- uint32_t i, sz, *bin;
-
- if (FD_DBG(DIRECT)) {
- sz = si->sizedwords;
- src = SS4_DIRECT;
- bin = fd_bo_map(so->bo);
- } else {
- sz = 0;
- src = SS4_INDIRECT;
- bin = NULL;
- }
-
- OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(src) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen));
- if (bin) {
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
- } else {
- OUT_RELOC(ring, so->bo, 0,
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
- }
-
- /* for how clever coverity is, it is sometimes rather dull, and
- * doesn't realize that the only case where bin==NULL, sz==0:
- */
- assume(bin || (sz == 0));
-
- for (i = 0; i < sz; i++) {
- OUT_RING(ring, bin[i]);
- }
+ const struct ir3_info *si = &so->info;
+ enum a4xx_state_block sb = fd4_stage2shadersb(so->type);
+ enum a4xx_state_src src;
+ uint32_t i, sz, *bin;
+
+ if (FD_DBG(DIRECT)) {
+ sz = si->sizedwords;
+ src = SS4_DIRECT;
+ bin = fd_bo_map(so->bo);
+ } else {
+ sz = 0;
+ src = SS4_INDIRECT;
+ bin = NULL;
+ }
+
+ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(src) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen));
+ if (bin) {
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
+ } else {
+ OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
+ }
+
+ /* for how clever coverity is, it is sometimes rather dull, and
+ * doesn't realize that the only case where bin==NULL, sz==0:
+ */
+ assume(bin || (sz == 0));
+
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, bin[i]);
+ }
}
struct stage {
- const struct ir3_shader_variant *v;
- const struct ir3_info *i;
- /* const sizes are in units of 4 * vec4 */
- uint8_t constoff;
- uint8_t constlen;
- /* instr sizes are in units of 16 instructions */
- uint8_t instroff;
- uint8_t instrlen;
+ const struct ir3_shader_variant *v;
+ const struct ir3_info *i;
+ /* const sizes are in units of 4 * vec4 */
+ uint8_t constoff;
+ uint8_t constlen;
+ /* instr sizes are in units of 16 instructions */
+ uint8_t instroff;
+ uint8_t instrlen;
};
-enum {
- VS = 0,
- FS = 1,
- HS = 2,
- DS = 3,
- GS = 4,
- MAX_STAGES
-};
+enum { VS = 0, FS = 1, HS = 2, DS = 3, GS = 4, MAX_STAGES };
static void
setup_stages(struct fd4_emit *emit, struct stage *s)
{
- unsigned i;
-
- s[VS].v = fd4_emit_get_vp(emit);
- s[FS].v = fd4_emit_get_fp(emit);
-
- s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
-
- for (i = 0; i < MAX_STAGES; i++) {
- if (s[i].v) {
- s[i].i = &s[i].v->info;
- /* constlen is in units of 4 * vec4: */
- assert(s[i].v->constlen % 4 == 0);
- s[i].constlen = s[i].v->constlen / 4;
- /* instrlen is already in units of 16 instr.. although
- * probably we should ditch that and not make the compiler
- * care about instruction group size of a3xx vs a4xx
- */
- s[i].instrlen = s[i].v->instrlen;
- } else {
- s[i].i = NULL;
- s[i].constlen = 0;
- s[i].instrlen = 0;
- }
- }
-
- /* NOTE: at least for gles2, blob partitions VS at bottom of const
- * space and FS taking entire remaining space. We probably don't
- * need to do that the same way, but for now mimic what the blob
- * does to make it easier to diff against register values from blob
- *
- * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
- * is run from external memory.
- */
- if ((s[VS].instrlen + s[FS].instrlen) > 64) {
- /* prioritize FS for internal memory: */
- if (s[FS].instrlen < 64) {
- /* if FS can fit, kick VS out to external memory: */
- s[VS].instrlen = 0;
- } else if (s[VS].instrlen < 64) {
- /* otherwise if VS can fit, kick out FS: */
- s[FS].instrlen = 0;
- } else {
- /* neither can fit, run both from external memory: */
- s[VS].instrlen = 0;
- s[FS].instrlen = 0;
- }
- }
- s[VS].constlen = 66;
- s[FS].constlen = 128 - s[VS].constlen;
- s[VS].instroff = 0;
- s[VS].constoff = 0;
- s[FS].instroff = 64 - s[FS].instrlen;
- s[FS].constoff = s[VS].constlen;
- s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
- s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
+ unsigned i;
+
+ s[VS].v = fd4_emit_get_vp(emit);
+ s[FS].v = fd4_emit_get_fp(emit);
+
+ s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
+
+ for (i = 0; i < MAX_STAGES; i++) {
+ if (s[i].v) {
+ s[i].i = &s[i].v->info;
+ /* constlen is in units of 4 * vec4: */
+ assert(s[i].v->constlen % 4 == 0);
+ s[i].constlen = s[i].v->constlen / 4;
+ /* instrlen is already in units of 16 instr.. although
+ * probably we should ditch that and not make the compiler
+ * care about instruction group size of a3xx vs a4xx
+ */
+ s[i].instrlen = s[i].v->instrlen;
+ } else {
+ s[i].i = NULL;
+ s[i].constlen = 0;
+ s[i].instrlen = 0;
+ }
+ }
+
+ /* NOTE: at least for gles2, blob partitions VS at bottom of const
+ * space and FS taking entire remaining space. We probably don't
+ * need to do that the same way, but for now mimic what the blob
+ * does to make it easier to diff against register values from blob
+ *
+ * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
+ * is run from external memory.
+ */
+ if ((s[VS].instrlen + s[FS].instrlen) > 64) {
+ /* prioritize FS for internal memory: */
+ if (s[FS].instrlen < 64) {
+ /* if FS can fit, kick VS out to external memory: */
+ s[VS].instrlen = 0;
+ } else if (s[VS].instrlen < 64) {
+ /* otherwise if VS can fit, kick out FS: */
+ s[FS].instrlen = 0;
+ } else {
+ /* neither can fit, run both from external memory: */
+ s[VS].instrlen = 0;
+ s[FS].instrlen = 0;
+ }
+ }
+ s[VS].constlen = 66;
+ s[FS].constlen = 128 - s[VS].constlen;
+ s[VS].instroff = 0;
+ s[VS].constoff = 0;
+ s[FS].instroff = 64 - s[FS].instrlen;
+ s[FS].constoff = s[VS].constlen;
+ s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+ s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
}
void
-fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
- int nr, struct pipe_surface **bufs)
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr,
+ struct pipe_surface **bufs)
{
- struct stage s[MAX_STAGES];
- uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
- uint32_t face_regid, coord_regid, zwcoord_regid, ij_regid[IJ_COUNT];
- enum a3xx_threadsize fssz;
- int constmode;
- int i, j;
-
- debug_assert(nr <= ARRAY_SIZE(color_regid));
-
- if (emit->binning_pass)
- nr = 0;
-
- setup_stages(emit, s);
-
- fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS;
-
- /* blob seems to always use constmode currently: */
- constmode = 1;
-
- pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
- if (pos_regid == regid(63, 0)) {
- /* hw dislikes when there is no position output, which can
- * happen for transform-feedback vertex shaders. Just tell
- * the hw to use r0.x, with whatever random value is there:
- */
- pos_regid = regid(0, 0);
- }
- posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
- psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
- if (s[FS].v->color0_mrt) {
- color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
- color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
- ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
- } else {
- color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
- color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
- color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
- color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
- color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
- color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
- color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
- color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
- }
-
- face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
- coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
- zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
- for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
- ij_regid[i] = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
-
- /* we could probably divide this up into things that need to be
- * emitted if frag-prog is dirty vs if vert-prog is dirty..
- */
-
- OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
- OUT_RING(ring, 0x00000003);
-
- OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
- OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
- A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
- A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
- /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
- * flush some caches? I think we only need to set those
- * bits if we have updated const or shader..
- */
- A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
- A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
- OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
- A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
- A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) |
- A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid));
- OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
- 0x3f3f000 | /* XXX */
- A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
- /* XXX left out centroid/sample for now */
- OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
- A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
- A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) |
- A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(ij_regid[IJ_LINEAR_CENTROID]));
- OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */
-
- OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
- OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
- A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
- A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
- A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
- OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
- A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
- A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
- A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
- OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
- A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
- A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
- A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
- OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
- A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
- A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
- A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
- OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
- A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
- A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
- A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));
-
- OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
- OUT_RING(ring, 0x140010 | /* XXX */
- COND(emit->binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));
-
- OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
- OUT_RING(ring, 0x7f | /* XXX */
- COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) |
- COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) |
- COND(s[VS].instrlen && s[FS].instrlen,
- A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER));
-
- OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
- OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */
-
- OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
- OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
- A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
- A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
- A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
- A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
- A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
- COND(s[VS].v->need_pixlod, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
- OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
- A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
- OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
- A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
- A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in));
-
- struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, s[VS].v, s[FS].v, false);
-
- for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);
-
- reg |= A4XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
- reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
- j++;
-
- reg |= A4XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
- reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
- j++;
-
- OUT_RING(ring, reg);
- }
-
- for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
-
- reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
- reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
- reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
- reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
-
- OUT_RING(ring, reg);
- }
-
- OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
- A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
- OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
-
- if (emit->binning_pass) {
- OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, 0x00000000); /* SP_FS_LENGTH_REG */
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
- A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(0) |
- A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(0) |
- A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
- A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
- A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE);
- OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
- 0x80000000);
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
- A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
- OUT_RING(ring, 0x00000000);
- } else {
- OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
- A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
- A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
- A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
- A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
- A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
- COND(s[FS].v->need_pixlod, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
- OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
- 0x80000000 | /* XXX */
- COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
- COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) |
- COND(s[FS].v->fragcoord_compmask != 0, A4XX_SP_FS_CTRL_REG1_FRAGCOORD));
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
- A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
- OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
- }
-
- OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
- OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
- A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));
-
- OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
- OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
- A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));
-
- OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
- OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
- A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
-
- OUT_PKT0(ring, REG_A4XX_GRAS_CNTL, 1);
- OUT_RING(ring,
- CONDREG(face_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
- CONDREG(zwcoord_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_GRAS_CNTL_IJ_PERSP) |
- CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_GRAS_CNTL_IJ_LINEAR) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A4XX_GRAS_CNTL_IJ_PERSP));
-
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
- OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_RB_RENDER_CONTROL2_IJ_PERSP_PIXEL) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A4XX_RB_RENDER_CONTROL2_IJ_PERSP_CENTROID) |
- CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_RB_RENDER_CONTROL2_SIZE) |
- COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
- COND(s[FS].v->fragcoord_compmask != 0,
- A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask)));
-
- OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
- OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(nr) |
- COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
- OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(nr) |
- COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
- A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
-
- OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
- for (i = 0; i < 8; i++) {
- enum a4xx_color_fmt format = 0;
- bool srgb = false;
- if (i < nr) {
- format = fd4_emit_format(bufs[i]);
- if (bufs[i] && !emit->no_decode_srgb)
- srgb = util_format_is_srgb(bufs[i]->format);
- }
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
- A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
- COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
- COND(color_regid[i] & HALF_REG_ID,
- A4XX_SP_FS_MRT_REG_HALF_PRECISION));
- }
-
- if (emit->binning_pass) {
- OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
- OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
- 0x40000000 | /* XXX */
- COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
- OUT_RING(ring, 0x00000000);
- } else {
- uint32_t vinterp[8], vpsrepl[8];
-
- memset(vinterp, 0, sizeof(vinterp));
- memset(vpsrepl, 0, sizeof(vpsrepl));
-
- /* looks like we need to do int varyings in the frag
- * shader on a4xx (no flatshad reg? or a420.0 bug?):
- *
- * (sy)(ss)nop
- * (sy)ldlv.u32 r0.x,l[r0.x], 1
- * ldlv.u32 r0.y,l[r0.x+1], 1
- * (ss)bary.f (ei)r63.x, 0, r0.x
- * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
- * (rpt5)nop
- * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
- *
- * Possibly on later a4xx variants we'll be able to use
- * something like the code below instead of workaround
- * in the shader:
- */
- /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
- for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
- /* NOTE: varyings are packed, so if compmask is 0xb
- * then first, third, and fourth component occupy
- * three consecutive varying slots:
- */
- unsigned compmask = s[FS].v->inputs[j].compmask;
-
- uint32_t inloc = s[FS].v->inputs[j].inloc;
-
- if (s[FS].v->inputs[j].flat ||
- (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
- uint32_t loc = inloc;
-
- for (i = 0; i < 4; i++) {
- if (compmask & (1 << i)) {
- vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
- //flatshade[loc / 32] |= 1 << (loc % 32);
- loc++;
- }
- }
- }
-
- bool coord_mode = emit->sprite_coord_mode;
- if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable, &coord_mode)) {
- /* mask is two 2-bit fields, where:
- * '01' -> S
- * '10' -> T
- * '11' -> 1 - T (flip mode)
- */
- unsigned mask = coord_mode ? 0b1101 : 0b1001;
- uint32_t loc = inloc;
- if (compmask & 0x1) {
- vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x2) {
- vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x4) {
- /* .z <- 0.0f */
- vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x8) {
- /* .w <- 1.0f */
- vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
- loc++;
- }
- }
- }
-
- OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
- OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
- A4XX_VPC_ATTR_THRDASSIGN(1) |
- COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
- 0x40000000 | /* XXX */
- COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
- OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
- A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));
-
- OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
- for (i = 0; i < 8; i++)
- OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
-
- OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
- for (i = 0; i < 8; i++)
- OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
- }
-
- if (s[VS].instrlen)
- emit_shader(ring, s[VS].v);
-
- if (!emit->binning_pass)
- if (s[FS].instrlen)
- emit_shader(ring, s[FS].v);
+ struct stage s[MAX_STAGES];
+ uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
+ uint32_t face_regid, coord_regid, zwcoord_regid, ij_regid[IJ_COUNT];
+ enum a3xx_threadsize fssz;
+ int constmode;
+ int i, j;
+
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
+ if (emit->binning_pass)
+ nr = 0;
+
+ setup_stages(emit, s);
+
+ fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS;
+
+ /* blob seems to always use constmode currently: */
+ constmode = 1;
+
+ pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+ if (pos_regid == regid(63, 0)) {
+ /* hw dislikes when there is no position output, which can
+ * happen for transform-feedback vertex shaders. Just tell
+ * the hw to use r0.x, with whatever random value is there:
+ */
+ pos_regid = regid(0, 0);
+ }
+ posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+ psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
+ if (s[FS].v->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+ ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
+ } else {
+ color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+ color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+ color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+ color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+ color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
+ }
+
+ face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid =
+ (coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2);
+ for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
+ ij_regid[i] = ir3_find_sysval_regid(
+ s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
+ OUT_RING(ring, 0x00000003);
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
+ A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+ A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
+ * flush some caches? I think we only need to set those
+ * bits if we have updated const or shader..
+ */
+ A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+ A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+ A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) |
+ A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid));
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
+ 0x3f3f000 | /* XXX */
+ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
+ /* XXX left out centroid/sample for now */
+ OUT_RING(
+ ring,
+ A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
+ A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
+ A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(
+ ij_regid[IJ_PERSP_CENTROID]) |
+ A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(
+ ij_regid[IJ_LINEAR_CENTROID]));
+ OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
+ OUT_RING(ring,
+ A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
+ A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
+ A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
+ OUT_RING(ring,
+ A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
+ A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
+ A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
+ OUT_RING(ring,
+ A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
+ A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
+ A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
+ OUT_RING(ring,
+ A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
+ A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
+ A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
+ OUT_RING(ring,
+ A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
+ A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
+ A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring,
+ 0x140010 | /* XXX */
+ COND(emit->binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));
+
+ OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
+ OUT_RING(ring, 0x7f | /* XXX */
+ COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) |
+ COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) |
+ COND(s[VS].instrlen && s[FS].instrlen,
+ A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER));
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
+ OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
+ OUT_RING(
+ ring,
+ A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+ A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+ A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+ A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+ A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+ A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+ COND(s[VS].v->need_pixlod, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(ring,
+ A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
+ A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
+ OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+ A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+ A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in));
+
+ struct ir3_shader_linkage l = {0};
+ ir3_link_shaders(&l, s[VS].v, s[FS].v, false);
+
+ for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);
+
+ reg |= A4XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
+ reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
+ j++;
+
+ reg |= A4XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
+ reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
+ j++;
+
+ OUT_RING(ring, reg);
+ }
+
+ for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
+
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
+
+ OUT_RING(ring, reg);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
+ OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
+
+ if (emit->binning_pass) {
+ OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, 0x00000000); /* SP_FS_LENGTH_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring,
+ A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
+ A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(0) |
+ A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(0) |
+ A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
+ A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE);
+ OUT_RING(ring,
+ A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000);
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring,
+ A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(
+ ring,
+ A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
+ A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+ A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+ A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
+ A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(s[FS].v->need_pixlod, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(ring,
+ A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
+ 0x80000000 | /* XXX */
+ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
+ COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) |
+ COND(s[FS].v->fragcoord_compmask != 0,
+ A4XX_SP_FS_CTRL_REG1_FRAGCOORD));
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring,
+ A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
+ OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ }
+
+ OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CNTL, 1);
+ OUT_RING(ring,
+ CONDREG(face_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
+ CONDREG(zwcoord_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
+ CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_GRAS_CNTL_IJ_PERSP) |
+ CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_GRAS_CNTL_IJ_LINEAR) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID], A4XX_GRAS_CNTL_IJ_PERSP));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
+ OUT_RING(
+ ring,
+ A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
+ CONDREG(ij_regid[IJ_PERSP_PIXEL],
+ A4XX_RB_RENDER_CONTROL2_IJ_PERSP_PIXEL) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID],
+ A4XX_RB_RENDER_CONTROL2_IJ_PERSP_CENTROID) |
+ CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_RB_RENDER_CONTROL2_SIZE) |
+ COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
+ COND(s[FS].v->fragcoord_compmask != 0,
+ A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask)));
+
+ OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
+ OUT_RING(ring,
+ A4XX_RB_FS_OUTPUT_REG_MRT(nr) |
+ COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
+ OUT_RING(ring,
+ A4XX_SP_FS_OUTPUT_REG_MRT(nr) |
+ COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+ A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ enum a4xx_color_fmt format = 0;
+ bool srgb = false;
+ if (i < nr) {
+ format = fd4_emit_format(bufs[i]);
+ if (bufs[i] && !emit->no_decode_srgb)
+ srgb = util_format_is_srgb(bufs[i]->format);
+ }
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+ A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
+ COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
+ COND(color_regid[i] & HALF_REG_ID,
+ A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+ }
+
+ if (emit->binning_pass) {
+ OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+ OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | 0x40000000 | /* XXX */
+ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ uint32_t vinterp[8], vpsrepl[8];
+
+ memset(vinterp, 0, sizeof(vinterp));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
+
+ /* looks like we need to do int varyings in the frag
+ * shader on a4xx (no flatshad reg? or a420.0 bug?):
+ *
+ * (sy)(ss)nop
+ * (sy)ldlv.u32 r0.x,l[r0.x], 1
+ * ldlv.u32 r0.y,l[r0.x+1], 1
+ * (ss)bary.f (ei)r63.x, 0, r0.x
+ * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+ * (rpt5)nop
+ * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+ *
+ * Possibly on later a4xx variants we'll be able to use
+ * something like the code below instead of workaround
+ * in the shader:
+ */
+ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
+ for (j = -1;
+ (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count;) {
+ /* NOTE: varyings are packed, so if compmask is 0xb
+ * then first, third, and fourth component occupy
+ * three consecutive varying slots:
+ */
+ unsigned compmask = s[FS].v->inputs[j].compmask;
+
+ uint32_t inloc = s[FS].v->inputs[j].inloc;
+
+ if (s[FS].v->inputs[j].flat ||
+ (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
+ uint32_t loc = inloc;
+
+ for (i = 0; i < 4; i++) {
+ if (compmask & (1 << i)) {
+ vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+ // flatshade[loc / 32] |= 1 << (loc % 32);
+ loc++;
+ }
+ }
+ }
+
+ bool coord_mode = emit->sprite_coord_mode;
+ if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable,
+ &coord_mode)) {
+ /* mask is two 2-bit fields, where:
+ * '01' -> S
+ * '10' -> T
+ * '11' -> 1 - T (flip mode)
+ */
+ unsigned mask = coord_mode ? 0b1101 : 0b1001;
+ uint32_t loc = inloc;
+ if (compmask & 0x1) {
+ vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x2) {
+ vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x4) {
+ /* .z <- 0.0f */
+ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x8) {
+ /* .w <- 1.0f */
+ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+ OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
+ A4XX_VPC_ATTR_THRDASSIGN(1) |
+ COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
+ 0x40000000 | /* XXX */
+ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
+ A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));
+
+ OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
+
+ OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ }
+
+ if (s[VS].instrlen)
+ emit_shader(ring, s[VS].v);
+
+ if (!emit->binning_pass)
+ if (s[FS].instrlen)
+ emit_shader(ring, s[FS].v);
}
static struct ir3_program_state *
fd4_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs,
- struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds,
- struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key)
- in_dt
+ struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
+ struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
+ struct ir3_shader_variant *fs,
+ const struct ir3_shader_key *key) in_dt
{
- struct fd_context *ctx = fd_context(data);
- struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state);
+ struct fd_context *ctx = fd_context(data);
+ struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state);
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- state->bs = bs;
- state->vs = vs;
- state->fs = fs;
+ state->bs = bs;
+ state->vs = vs;
+ state->fs = fs;
- return &state->base;
+ return &state->base;
}
static void
fd4_program_destroy(void *data, struct ir3_program_state *state)
{
- struct fd4_program_state *so = fd4_program_state(state);
- free(so);
+ struct fd4_program_state *so = fd4_program_state(state);
+ free(so);
}
static const struct ir3_cache_funcs cache_funcs = {
- .create_state = fd4_program_create,
- .destroy_state = fd4_program_destroy,
+ .create_state = fd4_program_create,
+ .destroy_state = fd4_program_destroy,
};
void
fd4_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
- ir3_prog_init(pctx);
- fd_prog_init(pctx);
+ ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
+ ir3_prog_init(pctx);
+ fd_prog_init(pctx);
}
struct fd4_emit;
struct fd4_program_state {
- struct ir3_program_state base;
- struct ir3_shader_variant *bs; /* VS for when emit->binning */
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *fs; /* FS for when !emit->binning */
+ struct ir3_program_state base;
+ struct ir3_shader_variant *bs; /* VS for when emit->binning */
+ struct ir3_shader_variant *vs;
+ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd4_program_state *
fd4_program_state(struct ir3_program_state *state)
{
- return (struct fd4_program_state *)state;
+ return (struct fd4_program_state *)state;
}
-void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
- int nr, struct pipe_surface **bufs);
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr,
+ struct pipe_surface **bufs);
void fd4_prog_init(struct pipe_context *pctx);
* Rob Clark <robclark@freedesktop.org>
*/
-#include "freedreno_query_hw.h"
#include "freedreno_context.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
-#include "fd4_query.h"
#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
-
+#include "fd4_query.h"
struct fd_rb_samp_ctrs {
- uint64_t ctr[16];
+ uint64_t ctr[16];
};
/*
static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_hw_sample *samp =
- fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
-
- /* low bits of sample addr should be zero (since they are control
- * flags in RB_SAMPLE_COUNT_CONTROL):
- */
- debug_assert((samp->offset & 0x3) == 0);
-
- /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
- * HW_QUERY_BASE_REG register:
- */
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
- OUT_RING(ring, HW_QUERY_BASE_REG);
- OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY |
- samp->offset);
-
- OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
- OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
- INDEX4_SIZE_32_BIT, USE_VISIBILITY));
- OUT_RING(ring, 1); /* NumInstances */
- OUT_RING(ring, 0); /* NumIndices */
-
- fd_event_write(batch, ring, ZPASS_DONE);
-
- return samp;
+ struct fd_hw_sample *samp =
+ fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
+
+ /* low bits of sample addr should be zero (since they are control
+ * flags in RB_SAMPLE_COUNT_CONTROL):
+ */
+ debug_assert((samp->offset & 0x3) == 0);
+
+ /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+ * HW_QUERY_BASE_REG register:
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
+ OUT_RING(ring, HW_QUERY_BASE_REG);
+ OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY | samp->offset);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
+ OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
+ INDEX4_SIZE_32_BIT, USE_VISIBILITY));
+ OUT_RING(ring, 1); /* NumInstances */
+ OUT_RING(ring, 0); /* NumIndices */
+
+ fd_event_write(batch, ring, ZPASS_DONE);
+
+ return samp;
}
static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
- const struct fd_rb_samp_ctrs *end)
+ const struct fd_rb_samp_ctrs *end)
{
- return end->ctr[0] - start->ctr[0];
+ return end->ctr[0] - start->ctr[0];
}
static void
-occlusion_counter_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end,
+ union pipe_query_result *result)
{
- uint64_t n = count_samples(start, end);
- result->u64 += n;
+ uint64_t n = count_samples(start, end);
+ result->u64 += n;
}
static void
-occlusion_predicate_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end,
+ union pipe_query_result *result)
{
- uint64_t n = count_samples(start, end);
- result->b |= (n > 0);
+ uint64_t n = count_samples(start, end);
+ result->b |= (n > 0);
}
/*
*/
static void
-time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
- assert_dt
+time_elapsed_enable(struct fd_context *ctx,
+ struct fd_ringbuffer *ring) assert_dt
{
- /* Right now, the assignment of countable to counter register is
- * just hard coded. If we start exposing more countables than we
- * have counters, we will need to be more clever.
- */
- struct fd_batch *batch = fd_context_batch_locked(ctx);
- fd_wfi(batch, ring);
- OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
- OUT_RING(ring, CP_ALWAYS_COUNT);
- fd_batch_unlock_submit(batch);
- fd_batch_reference(&batch, NULL);
+ /* Right now, the assignment of countable to counter register is
+ * just hard coded. If we start exposing more countables than we
+ * have counters, we will need to be more clever.
+ */
+ struct fd_batch *batch = fd_context_batch_locked(ctx);
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
+ OUT_RING(ring, CP_ALWAYS_COUNT);
+ fd_batch_unlock_submit(batch);
+ fd_batch_reference(&batch, NULL);
}
static struct fd_hw_sample *
-time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
- assert_dt
+time_elapsed_get_sample(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt
{
- struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
-
- /* use unused part of vsc_size_mem as scratch space, to avoid
- * extra allocation:
- */
- struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
- const int sample_off = 128;
- const int addr_off = sample_off + 8;
-
- debug_assert(batch->ctx->screen->max_freq > 0);
-
- /* Basic issue is that we need to read counter value to a relative
- * destination (with per-tile offset) rather than absolute dest
- * addr. But there is no pm4 packet that can do that. This is
- * where it would be *really* nice if we could write our own fw
- * since afaict implementing the sort of packet we need would be
- * trivial.
- *
- * Instead, we:
- * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
- * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
- * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
- * address to the per-sample offset in the scratch buffer
- * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
- * to CP_ME_NRT_ADDR
- * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
- * buffer to CP_ME_NRT_DATA to trigger the write out to query
- * result buffer
- *
- * Straightforward, right?
- *
- * Maybe could swap the order of things in the scratch buffer to
- * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
- * shot, but that's really just polishing a turd..
- */
-
- fd_wfi(batch, ring);
-
- /* copy sample counter _LO and _HI to scratch: */
- OUT_PKT3(ring, CP_REG_TO_MEM, 2);
- OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
- CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
- OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
-
- /* ok... here we really *would* like to use the CP_SET_CONSTANT
- * mode which can add a constant to value in reg2 and write to
- * reg1... *but* that only works for banked/context registers,
- * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
- * CP math to the scratch buffer instead:
- *
- * (note first 8 bytes are counter value, use offset 0x8 for
- * address calculation)
- */
-
- /* per-sample offset to scratch bo: */
- OUT_PKT3(ring, CP_MEM_WRITE, 2);
- OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
- OUT_RING(ring, samp->offset);
-
- /* now add to that the per-tile base: */
- OUT_PKT3(ring, CP_REG_TO_MEM, 2);
- OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
- CP_REG_TO_MEM_0_ACCUMULATE |
- CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */
- OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
-
- /* now copy that back to CP_ME_NRT_ADDR: */
- OUT_PKT3(ring, CP_MEM_TO_REG, 2);
- OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
- OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
-
- /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
- * to trigger the write to result buffer
- */
- OUT_PKT3(ring, CP_MEM_TO_REG, 2);
- OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
- OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
-
- /* and again to get the value of the _HI reg from scratch: */
- OUT_PKT3(ring, CP_MEM_TO_REG, 2);
- OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
- OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
-
- /* Sigh.. */
-
- return samp;
+ struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
+
+ /* use unused part of vsc_size_mem as scratch space, to avoid
+ * extra allocation:
+ */
+ struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
+ const int sample_off = 128;
+ const int addr_off = sample_off + 8;
+
+ debug_assert(batch->ctx->screen->max_freq > 0);
+
+ /* Basic issue is that we need to read counter value to a relative
+ * destination (with per-tile offset) rather than absolute dest
+ * addr. But there is no pm4 packet that can do that. This is
+ * where it would be *really* nice if we could write our own fw
+ * since afaict implementing the sort of packet we need would be
+ * trivial.
+ *
+ * Instead, we:
+ * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
+ * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
+ * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
+ * address to the per-sample offset in the scratch buffer
+ * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
+ * to CP_ME_NRT_ADDR
+ * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
+ * buffer to CP_ME_NRT_DATA to trigger the write out to query
+ * result buffer
+ *
+ * Straightforward, right?
+ *
+ * Maybe could swap the order of things in the scratch buffer to
+ * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
+ * shot, but that's really just polishing a turd..
+ */
+
+ fd_wfi(batch, ring);
+
+ /* copy sample counter _LO and _HI to scratch: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
+ CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* ok... here we really *would* like to use the CP_SET_CONSTANT
+ * mode which can add a constant to value in reg2 and write to
+ * reg1... *but* that only works for banked/context registers,
+ * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
+ * CP math to the scratch buffer instead:
+ *
+ * (note first 8 bytes are counter value, use offset 0x8 for
+ * address calculation)
+ */
+
+ /* per-sample offset to scratch bo: */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+ OUT_RING(ring, samp->offset);
+
+ /* now add to that the per-tile base: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
+ CP_REG_TO_MEM_0_ACCUMULATE |
+ CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* now copy that back to CP_ME_NRT_ADDR: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
+ * to trigger the write to result buffer
+ */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* and again to get the value of the _HI reg from scratch: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
+
+ /* Sigh.. */
+
+ return samp;
}
static void
-time_elapsed_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+time_elapsed_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end, union pipe_query_result *result)
{
- uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
- /* max_freq is in Hz, convert cycle count to ns: */
- result->u64 += n * 1000000000 / ctx->screen->max_freq;
+ uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
+ /* max_freq is in Hz, convert cycle count to ns: */
+ result->u64 += n * 1000000000 / ctx->screen->max_freq;
}
static void
-timestamp_accumulate_result(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result)
+timestamp_accumulate_result(struct fd_context *ctx, const void *start,
+ const void *end, union pipe_query_result *result)
{
- /* just return the value from fist tile: */
- if (result->u64 != 0)
- return;
- uint64_t n = *(uint64_t *)start;
- /* max_freq is in Hz, convert cycle count to ns: */
- result->u64 = n * 1000000000 / ctx->screen->max_freq;
+ /* just return the value from fist tile: */
+ if (result->u64 != 0)
+ return;
+ uint64_t n = *(uint64_t *)start;
+ /* max_freq is in Hz, convert cycle count to ns: */
+ result->u64 = n * 1000000000 / ctx->screen->max_freq;
}
static const struct fd_hw_sample_provider occlusion_counter = {
- .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_counter_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_counter_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_predicate_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
- .get_sample = occlusion_get_sample,
- .accumulate_result = occlusion_predicate_accumulate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
};
static const struct fd_hw_sample_provider time_elapsed = {
- .query_type = PIPE_QUERY_TIME_ELAPSED,
- .always = true,
- .enable = time_elapsed_enable,
- .get_sample = time_elapsed_get_sample,
- .accumulate_result = time_elapsed_accumulate_result,
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .always = true,
+ .enable = time_elapsed_enable,
+ .get_sample = time_elapsed_get_sample,
+ .accumulate_result = time_elapsed_accumulate_result,
};
/* NOTE: timestamp query isn't going to give terribly sensible results
* kind of good enough.
*/
static const struct fd_hw_sample_provider timestamp = {
- .query_type = PIPE_QUERY_TIMESTAMP,
- .always = true,
- .enable = time_elapsed_enable,
- .get_sample = time_elapsed_get_sample,
- .accumulate_result = timestamp_accumulate_result,
+ .query_type = PIPE_QUERY_TIMESTAMP,
+ .always = true,
+ .enable = time_elapsed_enable,
+ .get_sample = time_elapsed_get_sample,
+ .accumulate_result = timestamp_accumulate_result,
};
-void fd4_query_context_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+void
+fd4_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->create_query = fd_hw_create_query;
- ctx->query_prepare = fd_hw_query_prepare;
- ctx->query_prepare_tile = fd_hw_query_prepare_tile;
- ctx->query_update_batch = fd_hw_query_update_batch;
-
- fd_hw_query_register_provider(pctx, &occlusion_counter);
- fd_hw_query_register_provider(pctx, &occlusion_predicate);
- fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
- fd_hw_query_register_provider(pctx, &time_elapsed);
- fd_hw_query_register_provider(pctx, ×tamp);
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->create_query = fd_hw_create_query;
+ ctx->query_prepare = fd_hw_query_prepare;
+ ctx->query_prepare_tile = fd_hw_query_prepare_tile;
+ ctx->query_update_batch = fd_hw_query_update_batch;
+
+ fd_hw_query_register_provider(pctx, &occlusion_counter);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
+ fd_hw_query_register_provider(pctx, &time_elapsed);
+ fd_hw_query_register_provider(pctx, ×tamp);
}
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd4_rasterizer.h"
#include "fd4_context.h"
#include "fd4_format.h"
+#include "fd4_rasterizer.h"
void *
fd4_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso)
+ const struct pipe_rasterizer_state *cso)
{
- struct fd4_rasterizer_stateobj *so;
- float psize_min, psize_max;
+ struct fd4_rasterizer_stateobj *so;
+ float psize_min, psize_max;
- so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- if (cso->point_size_per_vertex) {
- psize_min = util_get_min_point_size(cso);
- psize_max = 4092;
- } else {
- /* Force the point size to be as if the vertex output was disabled. */
- psize_min = cso->point_size;
- psize_max = cso->point_size;
- }
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 4092;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
-/*
- if (cso->line_stipple_enable) {
- ??? TODO line stipple
- }
- TODO cso->half_pixel_center
- if (cso->multisample)
- TODO
-*/
- so->gras_cl_clip_cntl = 0x80000; /* ??? */
- so->gras_su_point_minmax =
- A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
- A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
- so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
- so->gras_su_poly_offset_scale =
- A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
- so->gras_su_poly_offset_offset =
- A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
- so->gras_su_poly_offset_clamp =
- A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
+ /*
+ if (cso->line_stipple_enable) {
+ ??? TODO line stipple
+ }
+ TODO cso->half_pixel_center
+ if (cso->multisample)
+ TODO
+ */
+ so->gras_cl_clip_cntl = 0x80000; /* ??? */
+ so->gras_su_point_minmax = A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+ A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+ so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
+ so->gras_su_poly_offset_scale =
+ A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+ so->gras_su_poly_offset_offset =
+ A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
+ so->gras_su_poly_offset_clamp =
+ A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
- so->gras_su_mode_control =
- A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
- so->pc_prim_vtx_cntl2 =
- A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
- A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+ so->gras_su_mode_control =
+ A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
+ so->pc_prim_vtx_cntl2 = A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(
+ fd_polygon_mode(cso->fill_front)) |
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(
+ fd_polygon_mode(cso->fill_back));
- if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
- cso->fill_back != PIPE_POLYGON_MODE_FILL)
- so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
- if (cso->cull_face & PIPE_FACE_FRONT)
- so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
- if (cso->cull_face & PIPE_FACE_BACK)
- so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
- if (!cso->front_ccw)
- so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
- if (!cso->flatshade_first)
- so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+ if (!cso->front_ccw)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+ if (!cso->flatshade_first)
+ so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
- if (cso->offset_tri)
- so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+ if (cso->offset_tri)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
- if (!cso->depth_clip_near)
- so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
- A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
- if (cso->clip_halfz)
- so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+ if (!cso->depth_clip_near)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+ A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
+ if (cso->clip_halfz)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
- return so;
+ return so;
}
#ifndef FD4_RASTERIZER_H_
#define FD4_RASTERIZER_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
struct fd4_rasterizer_stateobj {
- struct pipe_rasterizer_state base;
- uint32_t gras_su_point_minmax;
- uint32_t gras_su_point_size;
- uint32_t gras_su_poly_offset_scale;
- uint32_t gras_su_poly_offset_offset;
- uint32_t gras_su_poly_offset_clamp;
+ struct pipe_rasterizer_state base;
+ uint32_t gras_su_point_minmax;
+ uint32_t gras_su_point_size;
+ uint32_t gras_su_poly_offset_scale;
+ uint32_t gras_su_poly_offset_offset;
+ uint32_t gras_su_poly_offset_clamp;
- uint32_t gras_su_mode_control;
- uint32_t gras_cl_clip_cntl;
- uint32_t pc_prim_vtx_cntl;
- uint32_t pc_prim_vtx_cntl2;
+ uint32_t gras_su_mode_control;
+ uint32_t gras_cl_clip_cntl;
+ uint32_t pc_prim_vtx_cntl;
+ uint32_t pc_prim_vtx_cntl2;
};
static inline struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
- return (struct fd4_rasterizer_stateobj *)rast;
+ return (struct fd4_rasterizer_stateobj *)rast;
}
-void * fd4_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso);
+void *fd4_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
#endif /* FD4_RASTERIZER_H_ */
uint32_t
fd4_setup_slices(struct fd_resource *rsc)
{
- struct pipe_resource *prsc = &rsc->b.b;
- enum pipe_format format = prsc->format;
- uint32_t level, size = 0;
- uint32_t width = prsc->width0;
- uint32_t height = prsc->height0;
- uint32_t depth = prsc->depth0;
- /* in layer_first layout, the level (slice) contains just one
- * layer (since in fact the layer contains the slices)
- */
- uint32_t layers_in_level, alignment;
+ struct pipe_resource *prsc = &rsc->b.b;
+ enum pipe_format format = prsc->format;
+ uint32_t level, size = 0;
+ uint32_t width = prsc->width0;
+ uint32_t height = prsc->height0;
+ uint32_t depth = prsc->depth0;
+ /* in layer_first layout, the level (slice) contains just one
+ * layer (since in fact the layer contains the slices)
+ */
+ uint32_t layers_in_level, alignment;
- if (prsc->target == PIPE_TEXTURE_3D) {
- rsc->layout.layer_first = false;
- layers_in_level = prsc->array_size;
- alignment = 4096;
- } else {
- rsc->layout.layer_first = true;
- layers_in_level = 1;
- alignment = 1;
- }
+ if (prsc->target == PIPE_TEXTURE_3D) {
+ rsc->layout.layer_first = false;
+ layers_in_level = prsc->array_size;
+ alignment = 4096;
+ } else {
+ rsc->layout.layer_first = true;
+ layers_in_level = 1;
+ alignment = 1;
+ }
- /* 32 pixel alignment */
- fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
+ /* 32 pixel alignment */
+ fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
- for (level = 0; level <= prsc->last_level; level++) {
- struct fdl_slice *slice = fd_resource_slice(rsc, level);
- uint32_t pitch = fdl_pitch(&rsc->layout, level);
- uint32_t nblocksy = util_format_get_nblocksy(format, height);
+ for (level = 0; level <= prsc->last_level; level++) {
+ struct fdl_slice *slice = fd_resource_slice(rsc, level);
+ uint32_t pitch = fdl_pitch(&rsc->layout, level);
+ uint32_t nblocksy = util_format_get_nblocksy(format, height);
- slice->offset = size;
+ slice->offset = size;
- /* 3d textures can have different layer sizes for high levels, but the
- * hw auto-sizer is buggy (or at least different than what this code
- * does), so as soon as the layer size range gets into range, we stop
- * reducing it.
- */
- if (prsc->target == PIPE_TEXTURE_3D &&
- (level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000))
- slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
- else
- slice->size0 = align(nblocksy * pitch, alignment);
+ /* 3d textures can have different layer sizes for high levels, but the
+ * hw auto-sizer is buggy (or at least different than what this code
+ * does), so as soon as the layer size range gets into range, we stop
+ * reducing it.
+ */
+ if (prsc->target == PIPE_TEXTURE_3D &&
+ (level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000))
+ slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
+ else
+ slice->size0 = align(nblocksy * pitch, alignment);
- size += slice->size0 * depth * layers_in_level;
+ size += slice->size0 * depth * layers_in_level;
- width = u_minify(width, 1);
- height = u_minify(height, 1);
- depth = u_minify(depth, 1);
- }
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
- return size;
+ return size;
}
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
-#include "fd4_screen.h"
#include "fd4_context.h"
#include "fd4_emit.h"
#include "fd4_format.h"
#include "fd4_resource.h"
+#include "fd4_screen.h"
#include "ir3/ir3_compiler.h"
static bool
fd4_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned usage)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned storage_sample_count, unsigned usage)
{
- unsigned retval = 0;
+ unsigned retval = 0;
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- (sample_count > 1)) { /* TODO add MSAA */
- DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
- util_format_name(format), target, sample_count, usage);
- return false;
- }
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ (sample_count > 1)) { /* TODO add MSAA */
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return false;
+ }
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- (fd4_pipe2vtx(format) != VFMT4_NONE)) {
- retval |= PIPE_BIND_VERTEX_BUFFER;
- }
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd4_pipe2vtx(format) != VFMT4_NONE)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
- if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- (fd4_pipe2tex(format) != TFMT4_NONE) &&
- (target == PIPE_BUFFER ||
- util_format_get_blocksize(format) != 12)) {
- retval |= PIPE_BIND_SAMPLER_VIEW;
- }
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (fd4_pipe2tex(format) != TFMT4_NONE) &&
+ (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
- (fd4_pipe2color(format) != RB4_NONE) &&
- (fd4_pipe2tex(format) != TFMT4_NONE)) {
- retval |= usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED);
- }
+ if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
+ (fd4_pipe2color(format) != RB4_NONE) &&
+ (fd4_pipe2tex(format) != TFMT4_NONE)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
+ }
- /* For ARB_framebuffer_no_attachments: */
- if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
- retval |= usage & PIPE_BIND_RENDER_TARGET;
- }
+ /* For ARB_framebuffer_no_attachments: */
+ if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
+ retval |= usage & PIPE_BIND_RENDER_TARGET;
+ }
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (fd4_pipe2depth(format) != (enum a4xx_depth_format)~0) &&
- (fd4_pipe2tex(format) != TFMT4_NONE)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd4_pipe2depth(format) != (enum a4xx_depth_format) ~0) &&
+ (fd4_pipe2tex(format) != TFMT4_NONE)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
- if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
- if (retval != usage) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, "
- "usage=%x, retval=%x", util_format_name(format),
- target, sample_count, usage, retval);
- }
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x",
+ util_format_name(format), target, sample_count, usage, retval);
+ }
- return retval == usage;
+ return retval == usage;
}
void
fd4_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = A4XX_MAX_RENDER_TARGETS;
- screen->setup_slices = fd4_setup_slices;
- pscreen->context_create = fd4_context_create;
- pscreen->is_format_supported = fd4_screen_is_format_supported;
- fd4_emit_init_screen(pscreen);
- ir3_screen_init(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->max_rts = A4XX_MAX_RENDER_TARGETS;
+ screen->setup_slices = fd4_setup_slices;
+ pscreen->context_create = fd4_context_create;
+ pscreen->is_format_supported = fd4_screen_is_format_supported;
+ fd4_emit_init_screen(pscreen);
+ ir3_screen_init(pscreen);
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd4_texture.h"
#include "fd4_format.h"
+#include "fd4_texture.h"
static enum a4xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return A4XX_TEX_REPEAT;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return A4XX_TEX_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- *needs_border = true;
- return A4XX_TEX_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- /* only works for PoT.. need to emulate otherwise! */
- return A4XX_TEX_MIRROR_CLAMP;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return A4XX_TEX_MIRROR_REPEAT;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* these two we could perhaps emulate, but we currently
- * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
- */
- default:
- DBG("invalid wrap: %u", wrap);
- return 0;
- }
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A4XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A4XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ *needs_border = true;
+ return A4XX_TEX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ /* only works for PoT.. need to emulate otherwise! */
+ return A4XX_TEX_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A4XX_TEX_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* these two we could perhaps emulate, but we currently
+ * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+ */
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
}
static enum a4xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST:
- return A4XX_TEX_NEAREST;
- case PIPE_TEX_FILTER_LINEAR:
- return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A4XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static void *
fd4_sampler_state_create(struct pipe_context *pctx,
- const struct pipe_sampler_state *cso)
+ const struct pipe_sampler_state *cso)
{
- struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
- unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
- bool miplinear = false;
-
- if (!so)
- return NULL;
-
- if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- miplinear = true;
-
- so->base = *cso;
-
- so->needs_border = false;
- so->texsamp0 =
- COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
- A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
- A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
- A4XX_TEX_SAMP_0_ANISO(aniso) |
- A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
- A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
- A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
-
- so->texsamp1 =
-// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
- COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
- COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
-
- if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
- so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
- so->texsamp1 |=
- A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
- A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
- }
-
- if (cso->compare_mode)
- so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
-
- return so;
+ struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
+ unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+ bool miplinear = false;
+
+ if (!so)
+ return NULL;
+
+ if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ miplinear = true;
+
+ so->base = *cso;
+
+ so->needs_border = false;
+ so->texsamp0 =
+ COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
+ A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+ A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+ A4XX_TEX_SAMP_0_ANISO(aniso) |
+ A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
+ A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
+ A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
+
+ so->texsamp1 =
+ // COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+ COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
+ COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
+
+ if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
+ so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+ A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+ }
+
+ if (cso->compare_mode)
+ so->texsamp1 |=
+ A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+ return so;
}
-
static enum a4xx_tex_type
tex_type(unsigned target)
{
- switch (target) {
- default:
- assert(0);
- case PIPE_BUFFER:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return A4XX_TEX_1D;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- return A4XX_TEX_2D;
- case PIPE_TEXTURE_3D:
- return A4XX_TEX_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return A4XX_TEX_CUBE;
- }
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A4XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A4XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A4XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A4XX_TEX_CUBE;
+ }
}
static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{
- return (fd_screen(pctx->screen)->gpu_id == 420) &&
- (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
+ return (fd_screen(pctx->screen)->gpu_id == 420) &&
+ (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
}
static struct pipe_sampler_view *
fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
- const struct pipe_sampler_view *cso)
+ const struct pipe_sampler_view *cso)
{
- struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format format = cso->format;
- unsigned lvl, layers = 0;
-
- if (!so)
- return NULL;
-
- if (format == PIPE_FORMAT_X32_S8X24_UINT) {
- rsc = rsc->stencil;
- format = rsc->b.b.format;
- }
-
- so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
- so->base.reference.count = 1;
- so->base.context = pctx;
-
- so->texconst0 =
- A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
- A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
- fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a);
-
- if (util_format_is_srgb(format)) {
- if (use_astc_srgb_workaround(pctx, format))
- so->astc_srgb = true;
- so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
- }
-
- if (cso->target == PIPE_BUFFER) {
- unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
-
- lvl = 0;
- so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(elements) |
- A4XX_TEX_CONST_1_HEIGHT(1);
- so->texconst2 =
- A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp);
- so->offset = cso->u.buf.offset;
- } else {
- unsigned miplevels;
-
- lvl = fd_sampler_first_level(cso);
- miplevels = fd_sampler_last_level(cso) - lvl;
- layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
-
- so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
- so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) |
- A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
- so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
- }
-
- /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
- * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
- * way to re-arrange things so stencil component is where the swiz
- * expects.
- *
- * Note that gallium expects stencil sampler to return (s,s,s,s)
- * which isn't quite true. To make that happen we'd have to massage
- * the swizzle. But in practice only the .x component is used.
- */
- if (format == PIPE_FORMAT_X24S8_UINT)
- so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW);
-
- switch (cso->target) {
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(layers) |
- A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
- break;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(layers / 6) |
- A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
- break;
- case PIPE_TEXTURE_3D:
- so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
- A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0);
- so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(
- fd_resource_slice(rsc, prsc->last_level)->size0);
- break;
- default:
- so->texconst3 = 0x00000000;
- break;
- }
-
- return &so->base;
+ struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format format = cso->format;
+ unsigned lvl, layers = 0;
+
+ if (!so)
+ return NULL;
+
+ if (format == PIPE_FORMAT_X32_S8X24_UINT) {
+ rsc = rsc->stencil;
+ format = rsc->b.b.format;
+ }
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->texconst0 = A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
+ A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+ fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
+ if (util_format_is_srgb(format)) {
+ if (use_astc_srgb_workaround(pctx, format))
+ so->astc_srgb = true;
+ so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
+ }
+
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
+
+ lvl = 0;
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(elements) | A4XX_TEX_CONST_1_HEIGHT(1);
+ so->texconst2 = A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp);
+ so->offset = cso->u.buf.offset;
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 = A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 = A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) |
+ A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
+ so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+
+ /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
+ * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
+ * way to re-arrange things so stencil component is where the swiz
+ * expects.
+ *
+ * Note that gallium expects stencil sampler to return (s,s,s,s)
+ * which isn't quite true. To make that happen we'd have to massage
+ * the swizzle. But in practice only the .x component is used.
+ */
+ if (format == PIPE_FORMAT_X24S8_UINT)
+ so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW);
+
+ switch (cso->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers) |
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers / 6) |
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
+ break;
+ case PIPE_TEXTURE_3D:
+ so->texconst3 =
+ A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
+ A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0);
+ so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(
+ fd_resource_slice(rsc, prsc->last_level)->size0);
+ break;
+ default:
+ so->texconst3 = 0x00000000;
+ break;
+ }
+
+ return &so->base;
}
static void
fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
- unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
+ unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd4_context *fd4_ctx = fd4_context(ctx);
- uint16_t astc_srgb = 0;
- unsigned i;
-
- for (i = 0; i < nr; i++) {
- if (views[i]) {
- struct fd4_pipe_sampler_view *view =
- fd4_pipe_sampler_view(views[i]);
- if (view->astc_srgb)
- astc_srgb |= (1 << i);
- }
- }
-
- fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
-
- if (shader == PIPE_SHADER_FRAGMENT) {
- fd4_ctx->fastc_srgb = astc_srgb;
- } else if (shader == PIPE_SHADER_VERTEX) {
- fd4_ctx->vastc_srgb = astc_srgb;
- }
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ uint16_t astc_srgb = 0;
+ unsigned i;
+
+ for (i = 0; i < nr; i++) {
+ if (views[i]) {
+ struct fd4_pipe_sampler_view *view = fd4_pipe_sampler_view(views[i]);
+ if (view->astc_srgb)
+ astc_srgb |= (1 << i);
+ }
+ }
+
+ fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
+ views);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ fd4_ctx->fastc_srgb = astc_srgb;
+ } else if (shader == PIPE_SHADER_VERTEX) {
+ fd4_ctx->vastc_srgb = astc_srgb;
+ }
}
void
fd4_texture_init(struct pipe_context *pctx)
{
- pctx->create_sampler_state = fd4_sampler_state_create;
- pctx->bind_sampler_states = fd_sampler_states_bind;
- pctx->create_sampler_view = fd4_sampler_view_create;
- pctx->set_sampler_views = fd4_set_sampler_views;
+ pctx->create_sampler_state = fd4_sampler_state_create;
+ pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->create_sampler_view = fd4_sampler_view_create;
+ pctx->set_sampler_views = fd4_set_sampler_views;
}
#include "pipe/p_context.h"
-#include "freedreno_texture.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "fd4_context.h"
#include "fd4_format.h"
struct fd4_sampler_stateobj {
- struct pipe_sampler_state base;
- uint32_t texsamp0, texsamp1;
- bool needs_border;
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1;
+ bool needs_border;
};
static inline struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{
- return (struct fd4_sampler_stateobj *)samp;
+ return (struct fd4_sampler_stateobj *)samp;
}
struct fd4_pipe_sampler_view {
- struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
- uint32_t offset;
- bool astc_srgb;
+ struct pipe_sampler_view base;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
+ uint32_t offset;
+ bool astc_srgb;
};
static inline struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{
- return (struct fd4_pipe_sampler_view *)pview;
+ return (struct fd4_pipe_sampler_view *)pview;
}
unsigned fd4_get_const_idx(struct fd_context *ctx,
- struct fd_texture_stateobj *tex, unsigned samp_id);
+ struct fd_texture_stateobj *tex, unsigned samp_id);
void fd4_texture_init(struct pipe_context *pctx);
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd4_zsa.h"
#include "fd4_context.h"
#include "fd4_format.h"
+#include "fd4_zsa.h"
void *
fd4_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso)
+ const struct pipe_depth_stencil_alpha_state *cso)
{
- struct fd4_zsa_stateobj *so;
+ struct fd4_zsa_stateobj *so;
- so = CALLOC_STRUCT(fd4_zsa_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd4_zsa_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- so->rb_depth_control |=
- A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
+ so->rb_depth_control |=
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
- if (cso->depth_enabled)
- so->rb_depth_control |=
- A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
- A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+ if (cso->depth_enabled)
+ so->rb_depth_control |=
+ A4XX_RB_DEPTH_CONTROL_Z_ENABLE | A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
- if (cso->depth_writemask)
- so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+ if (cso->depth_writemask)
+ so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
- if (cso->stencil[0].enabled) {
- const struct pipe_stencil_state *s = &cso->stencil[0];
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
- so->rb_stencil_control |=
- A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
- A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
- A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
- A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
- A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
- so->rb_stencil_control2 |=
- A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
- so->rb_stencilrefmask |=
- 0xff000000 | /* ??? */
- A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
- A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+ so->rb_stencil_control |=
+ A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
+ A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencil_control2 |= A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
+ so->rb_stencilrefmask |=
+ 0xff000000 | /* ??? */
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
- if (cso->stencil[1].enabled) {
- const struct pipe_stencil_state *bs = &cso->stencil[1];
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
- so->rb_stencil_control |=
- A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
- A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
- A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
- A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
- A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
- so->rb_stencilrefmask_bf |=
- 0xff000000 | /* ??? */
- A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
- A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
- }
- }
+ so->rb_stencil_control |=
+ A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ 0xff000000 | /* ??? */
+ A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+ }
+ }
- if (cso->alpha_enabled) {
- uint32_t ref = cso->alpha_ref_value * 255.0;
- so->gras_alpha_control =
- A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
- so->rb_alpha_control =
- A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
- A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
- A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
- so->rb_depth_control |=
- A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
- }
+ if (cso->alpha_enabled) {
+ uint32_t ref = cso->alpha_ref_value * 255.0;
+ so->gras_alpha_control = A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
+ so->rb_alpha_control =
+ A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+ A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
+ A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
+ so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
- return so;
+ return so;
}
#ifndef FD4_ZSA_H_
#define FD4_ZSA_H_
-
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd4_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state base;
- uint32_t gras_alpha_control;
- uint32_t rb_alpha_control;
- uint32_t rb_depth_control;
- uint32_t rb_stencil_control;
- uint32_t rb_stencil_control2;
- uint32_t rb_stencilrefmask;
- uint32_t rb_stencilrefmask_bf;
+ struct pipe_depth_stencil_alpha_state base;
+ uint32_t gras_alpha_control;
+ uint32_t rb_alpha_control;
+ uint32_t rb_depth_control;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencil_control2;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
};
static inline struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
- return (struct fd4_zsa_stateobj *)zsa;
+ return (struct fd4_zsa_stateobj *)zsa;
}
-void * fd4_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso);
+void *fd4_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD4_ZSA_H_ */
#include "pipe/p_state.h"
#include "util/u_blend.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd5_blend.h"
#include "fd5_context.h"
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
- switch (func) {
- case PIPE_BLEND_ADD:
- return BLEND_DST_PLUS_SRC;
- case PIPE_BLEND_MIN:
- return BLEND_MIN_DST_SRC;
- case PIPE_BLEND_MAX:
- return BLEND_MAX_DST_SRC;
- case PIPE_BLEND_SUBTRACT:
- return BLEND_SRC_MINUS_DST;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLEND_DST_MINUS_SRC;
- default:
- DBG("invalid blend func: %x", func);
- return 0;
- }
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
}
void *
fd5_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso)
+ const struct pipe_blend_state *cso)
{
- struct fd5_blend_stateobj *so;
- enum a3xx_rop_code rop = ROP_COPY;
- bool reads_dest = false;
- unsigned i, mrt_blend = 0;
-
- if (cso->logicop_enable) {
- rop = cso->logicop_func; /* maps 1:1 */
- reads_dest = util_logicop_reads_dest(cso->logicop_func);
- }
-
- so = CALLOC_STRUCT(fd5_blend_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- so->lrz_write = true; /* unless blend enabled for any MRT */
-
- for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
- const struct pipe_rt_blend_state *rt;
-
- if (cso->independent_blend_enable)
- rt = &cso->rt[i];
- else
- rt = &cso->rt[0];
-
- so->rb_mrt[i].blend_control =
- A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
- A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
- A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
- A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
- A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
-
- so->rb_mrt[i].control =
- A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
- COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
- A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
-
- if (rt->blend_enable) {
- so->rb_mrt[i].control |=
-// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
- A5XX_RB_MRT_CONTROL_BLEND |
- A5XX_RB_MRT_CONTROL_BLEND2;
- mrt_blend |= (1 << i);
- so->lrz_write = false;
- }
-
- if (reads_dest) {
-// so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
- mrt_blend |= (1 << i);
- }
-
-// if (cso->dither)
-// so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
- }
-
- so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
- COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
- COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
- so->sp_blend_cntl = A5XX_SP_BLEND_CNTL_UNK8 |
- COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
- COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED);
-
- return so;
+ struct fd5_blend_stateobj *so;
+ enum a3xx_rop_code rop = ROP_COPY;
+ bool reads_dest = false;
+ unsigned i, mrt_blend = 0;
+
+ if (cso->logicop_enable) {
+ rop = cso->logicop_func; /* maps 1:1 */
+ reads_dest = util_logicop_reads_dest(cso->logicop_func);
+ }
+
+ so = CALLOC_STRUCT(fd5_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->lrz_write = true; /* unless blend enabled for any MRT */
+
+ for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+ const struct pipe_rt_blend_state *rt;
+
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
+
+ so->rb_mrt[i].blend_control =
+ A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
+ fd_blend_factor(rt->rgb_src_factor)) |
+ A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
+ fd_blend_factor(rt->rgb_dst_factor)) |
+ A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
+ fd_blend_factor(rt->alpha_src_factor)) |
+ A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
+ blend_func(rt->alpha_func)) |
+ A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
+ fd_blend_factor(rt->alpha_dst_factor));
+
+ so->rb_mrt[i].control =
+ A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
+ A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+ if (rt->blend_enable) {
+ so->rb_mrt[i].control |=
+ // A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE
+ //|
+ A5XX_RB_MRT_CONTROL_BLEND | A5XX_RB_MRT_CONTROL_BLEND2;
+ mrt_blend |= (1 << i);
+ so->lrz_write = false;
+ }
+
+ if (reads_dest) {
+ // so->rb_mrt[i].control |=
+ //A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ mrt_blend |= (1 << i);
+ }
+
+ // if (cso->dither)
+ // so->rb_mrt[i].buf_info |=
+ //A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+ }
+
+ so->rb_blend_cntl =
+ A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
+ COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
+ COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
+ so->sp_blend_cntl =
+ A5XX_SP_BLEND_CNTL_UNK8 |
+ COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
+ COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED);
+
+ return so;
}
#ifndef FD5_BLEND_H_
#define FD5_BLEND_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd5_blend_stateobj {
- struct pipe_blend_state base;
-
- struct {
- uint32_t control;
- uint32_t buf_info;
- uint32_t blend_control;
- } rb_mrt[A5XX_MAX_RENDER_TARGETS];
- uint32_t rb_blend_cntl;
- uint32_t sp_blend_cntl;
- bool lrz_write;
+ struct pipe_blend_state base;
+
+ struct {
+ uint32_t control;
+ uint32_t buf_info;
+ uint32_t blend_control;
+ } rb_mrt[A5XX_MAX_RENDER_TARGETS];
+ uint32_t rb_blend_cntl;
+ uint32_t sp_blend_cntl;
+ bool lrz_write;
};
static inline struct fd5_blend_stateobj *
fd5_blend_stateobj(struct pipe_blend_state *blend)
{
- return (struct fd5_blend_stateobj *)blend;
+ return (struct fd5_blend_stateobj *)blend;
}
-void * fd5_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso);
+void *fd5_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
#endif /* FD5_BLEND_H_ */
#include "freedreno_resource.h"
#include "fd5_blitter.h"
-#include "fd5_format.h"
#include "fd5_emit.h"
+#include "fd5_format.h"
/* Make sure none of the requested dimensions extend beyond the size of the
* resource. Not entirely sure why this happens, but sometimes it does, and
static bool
ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
{
- return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
- (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
- (b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));
+ return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
+ (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
+ (b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));
}
/* Not sure if format restrictions differ for src and dst, or if
static bool
ok_format(enum pipe_format fmt)
{
- if (util_format_is_compressed(fmt))
- return false;
-
- switch (fmt) {
- case PIPE_FORMAT_R10G10B10A2_SSCALED:
- case PIPE_FORMAT_R10G10B10A2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_USCALED:
- case PIPE_FORMAT_B10G10R10A2_SSCALED:
- case PIPE_FORMAT_B10G10R10A2_SNORM:
- case PIPE_FORMAT_R10G10B10A2_UNORM:
- case PIPE_FORMAT_R10G10B10A2_USCALED:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
- case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- case PIPE_FORMAT_B10G10R10A2_UINT:
- case PIPE_FORMAT_R10G10B10A2_UINT:
- return false;
- default:
- break;
- }
-
- if (fd5_pipe2color(fmt) == RB5_NONE)
- return false;
-
- return true;
+ if (util_format_is_compressed(fmt))
+ return false;
+
+ switch (fmt) {
+ case PIPE_FORMAT_R10G10B10A2_SSCALED:
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_USCALED:
+ case PIPE_FORMAT_B10G10R10A2_SSCALED:
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_R10G10B10A2_USCALED:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ return false;
+ default:
+ break;
+ }
+
+ if (fd5_pipe2color(fmt) == RB5_NONE)
+ return false;
+
+ return true;
}
static bool
can_do_blit(const struct pipe_blit_info *info)
{
- /* I think we can do scaling, but not in z dimension since that would
- * require blending..
- */
- if (info->dst.box.depth != info->src.box.depth)
- return false;
+ /* I think we can do scaling, but not in z dimension since that would
+ * require blending..
+ */
+ if (info->dst.box.depth != info->src.box.depth)
+ return false;
- if (!ok_format(info->dst.format))
- return false;
+ if (!ok_format(info->dst.format))
+ return false;
- if (!ok_format(info->src.format))
- return false;
+ if (!ok_format(info->src.format))
+ return false;
- /* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
- * is set (not linear). We can kind of get around that when tiling/
- * untiling by setting both src and dst COLOR_SWAP=WZYX, but that
- * means the formats must match:
- */
- if ((fd_resource(info->dst.resource)->layout.tile_mode ||
- fd_resource(info->src.resource)->layout.tile_mode) &&
- info->dst.format != info->src.format)
- return false;
+ /* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
+ * is set (not linear). We can kind of get around that when tiling/
+ * untiling by setting both src and dst COLOR_SWAP=WZYX, but that
+ * means the formats must match:
+ */
+ if ((fd_resource(info->dst.resource)->layout.tile_mode ||
+ fd_resource(info->src.resource)->layout.tile_mode) &&
+ info->dst.format != info->src.format)
+ return false;
- /* until we figure out a few more registers: */
- if ((info->dst.box.width != info->src.box.width) ||
- (info->dst.box.height != info->src.box.height))
- return false;
+ /* until we figure out a few more registers: */
+ if ((info->dst.box.width != info->src.box.width) ||
+ (info->dst.box.height != info->src.box.height))
+ return false;
- /* src box can be inverted, which we don't support.. dst box cannot: */
- if ((info->src.box.width < 0) || (info->src.box.height < 0))
- return false;
+ /* src box can be inverted, which we don't support.. dst box cannot: */
+ if ((info->src.box.width < 0) || (info->src.box.height < 0))
+ return false;
- if (!ok_dims(info->src.resource, &info->src.box, info->src.level))
- return false;
+ if (!ok_dims(info->src.resource, &info->src.box, info->src.level))
+ return false;
- if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))
- return false;
+ if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))
+ return false;
- debug_assert(info->dst.box.width >= 0);
- debug_assert(info->dst.box.height >= 0);
- debug_assert(info->dst.box.depth >= 0);
+ debug_assert(info->dst.box.width >= 0);
+ debug_assert(info->dst.box.height >= 0);
+ debug_assert(info->dst.box.depth >= 0);
- if ((info->dst.resource->nr_samples > 1) ||
- (info->src.resource->nr_samples > 1))
- return false;
+ if ((info->dst.resource->nr_samples > 1) ||
+ (info->src.resource->nr_samples > 1))
+ return false;
- if (info->scissor_enable)
- return false;
+ if (info->scissor_enable)
+ return false;
- if (info->window_rectangle_include)
- return false;
+ if (info->window_rectangle_include)
+ return false;
- if (info->render_condition_enable)
- return false;
+ if (info->render_condition_enable)
+ return false;
- if (info->alpha_blend)
- return false;
+ if (info->alpha_blend)
+ return false;
- if (info->filter != PIPE_TEX_FILTER_NEAREST)
- return false;
+ if (info->filter != PIPE_TEX_FILTER_NEAREST)
+ return false;
- if (info->mask != util_format_get_mask(info->src.format))
- return false;
+ if (info->mask != util_format_get_mask(info->src.format))
+ return false;
- if (info->mask != util_format_get_mask(info->dst.format))
- return false;
+ if (info->mask != util_format_get_mask(info->dst.format))
+ return false;
- return true;
+ return true;
}
static void
emit_setup(struct fd_ringbuffer *ring)
{
- OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
- OUT_RING(ring, 0x00000008);
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+ OUT_RING(ring, 0x00000008);
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);
- OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);
+ OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);
- OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);
+ OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);
- OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);
+ OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
- OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
- OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */
}
/* buffers need to be handled specially since x/width can exceed the bounds
static void
emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{
- const struct pipe_box *sbox = &info->src.box;
- const struct pipe_box *dbox = &info->dst.box;
- struct fd_resource *src, *dst;
- unsigned sshift, dshift;
-
- src = fd_resource(info->src.resource);
- dst = fd_resource(info->dst.resource);
-
- debug_assert(src->layout.cpp == 1);
- debug_assert(dst->layout.cpp == 1);
- debug_assert(info->src.resource->format == info->dst.resource->format);
- debug_assert((sbox->y == 0) && (sbox->height == 1));
- debug_assert((dbox->y == 0) && (dbox->height == 1));
- debug_assert((sbox->z == 0) && (sbox->depth == 1));
- debug_assert((dbox->z == 0) && (dbox->depth == 1));
- debug_assert(sbox->width == dbox->width);
- debug_assert(info->src.level == 0);
- debug_assert(info->dst.level == 0);
-
- /*
- * Buffers can have dimensions bigger than max width, remap into
- * multiple 1d blits to fit within max dimension
- *
- * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
- * seems to prevent overfetch related faults. Not quite sure what
- * the deal is there.
- *
- * Low 6 bits of SRC/DST addresses need to be zero (ie. address
- * aligned to 64) so we need to shift src/dst x1/x2 to make up the
- * difference. On top of already splitting up the blit so width
- * isn't > 16k.
- *
- * We perhaps could do a bit better, if src and dst are aligned but
- * in the worst case this means we have to split the copy up into
- * 16k (0x4000) minus 64 (0x40).
- */
-
- sshift = sbox->x & 0x3f;
- dshift = dbox->x & 0x3f;
-
- for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
- unsigned soff, doff, w, p;
-
- soff = (sbox->x + off) & ~0x3f;
- doff = (dbox->x + off) & ~0x3f;
-
- w = MIN2(sbox->width - off, (0x4000 - 0x40));
- p = align(w, 64);
-
- debug_assert((soff + w) <= fd_bo_size(src->bo));
- debug_assert((doff + w) <= fd_bo_size(dst->bo));
-
- OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
- OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
-
- /*
- * Emit source:
- */
- OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
- OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
- A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
- A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
- OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
- OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
- A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
- A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));
-
- /*
- * Emit destination:
- */
- OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
- A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
- A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
- OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
- OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
- A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
- A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));
-
- /*
- * Blit command:
- */
- OUT_PKT7(ring, CP_BLIT, 5);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
- OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));
- OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift+w-1) | CP_BLIT_2_SRC_Y2(0));
- OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));
- OUT_RING(ring, CP_BLIT_4_DST_X2(dshift+w-1) | CP_BLIT_4_DST_Y2(0));
-
- OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
- OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
-
- OUT_WFI5(ring);
- }
+ const struct pipe_box *sbox = &info->src.box;
+ const struct pipe_box *dbox = &info->dst.box;
+ struct fd_resource *src, *dst;
+ unsigned sshift, dshift;
+
+ src = fd_resource(info->src.resource);
+ dst = fd_resource(info->dst.resource);
+
+ debug_assert(src->layout.cpp == 1);
+ debug_assert(dst->layout.cpp == 1);
+ debug_assert(info->src.resource->format == info->dst.resource->format);
+ debug_assert((sbox->y == 0) && (sbox->height == 1));
+ debug_assert((dbox->y == 0) && (dbox->height == 1));
+ debug_assert((sbox->z == 0) && (sbox->depth == 1));
+ debug_assert((dbox->z == 0) && (dbox->depth == 1));
+ debug_assert(sbox->width == dbox->width);
+ debug_assert(info->src.level == 0);
+ debug_assert(info->dst.level == 0);
+
+ /*
+ * Buffers can have dimensions bigger than max width, remap into
+ * multiple 1d blits to fit within max dimension
+ *
+ * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
+ * seems to prevent overfetch related faults. Not quite sure what
+ * the deal is there.
+ *
+ * Low 6 bits of SRC/DST addresses need to be zero (ie. address
+ * aligned to 64) so we need to shift src/dst x1/x2 to make up the
+ * difference. On top of already splitting up the blit so width
+ * isn't > 16k.
+ *
+ * We perhaps could do a bit better, if src and dst are aligned but
+ * in the worst case this means we have to split the copy up into
+ * 16k (0x4000) minus 64 (0x40).
+ */
+
+ sshift = sbox->x & 0x3f;
+ dshift = dbox->x & 0x3f;
+
+ for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
+ unsigned soff, doff, w, p;
+
+ soff = (sbox->x + off) & ~0x3f;
+ doff = (dbox->x + off) & ~0x3f;
+
+ w = MIN2(sbox->width - off, (0x4000 - 0x40));
+ p = align(w, 64);
+
+ debug_assert((soff + w) <= fd_bo_size(src->bo));
+ debug_assert((doff + w) <= fd_bo_size(dst->bo));
+
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
+ OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
+
+ /*
+ * Emit source:
+ */
+ OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
+ OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+ A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
+ A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
+ OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
+ OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
+ A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+ A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));
+
+ /*
+ * Emit destination:
+ */
+ OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+ A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
+ A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+ OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
+ OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
+ A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+ A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));
+
+ /*
+ * Blit command:
+ */
+ OUT_PKT7(ring, CP_BLIT, 5);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
+ OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));
+ OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift + w - 1) | CP_BLIT_2_SRC_Y2(0));
+ OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));
+ OUT_RING(ring, CP_BLIT_4_DST_X2(dshift + w - 1) | CP_BLIT_4_DST_Y2(0));
+
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
+ OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
+
+ OUT_WFI5(ring);
+ }
}
static void
emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{
- const struct pipe_box *sbox = &info->src.box;
- const struct pipe_box *dbox = &info->dst.box;
- struct fd_resource *src, *dst;
- struct fdl_slice *sslice, *dslice;
- enum a5xx_color_fmt sfmt, dfmt;
- enum a5xx_tile_mode stile, dtile;
- enum a3xx_color_swap sswap, dswap;
- unsigned ssize, dsize, spitch, dpitch;
- unsigned sx1, sy1, sx2, sy2;
- unsigned dx1, dy1, dx2, dy2;
-
- src = fd_resource(info->src.resource);
- dst = fd_resource(info->dst.resource);
-
- sslice = fd_resource_slice(src, info->src.level);
- dslice = fd_resource_slice(dst, info->dst.level);
-
- sfmt = fd5_pipe2color(info->src.format);
- dfmt = fd5_pipe2color(info->dst.format);
-
- stile = fd_resource_tile_mode(info->src.resource, info->src.level);
- dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
-
- sswap = fd5_pipe2swap(info->src.format);
- dswap = fd5_pipe2swap(info->dst.format);
-
- spitch = fd_resource_pitch(src, info->src.level);
- dpitch = fd_resource_pitch(dst, info->dst.level);
-
- /* if dtile, then dswap ignored by hw, and likewise if stile then sswap
- * ignored by hw.. but in this case we have already rejected the blit
- * if src and dst formats differ, so juse use WZYX for both src and
- * dst swap mode (so we don't change component order)
- */
- if (stile || dtile) {
- debug_assert(info->src.format == info->dst.format);
- sswap = dswap = WZYX;
- }
-
- sx1 = sbox->x;
- sy1 = sbox->y;
- sx2 = sbox->x + sbox->width - 1;
- sy2 = sbox->y + sbox->height - 1;
-
- dx1 = dbox->x;
- dy1 = dbox->y;
- dx2 = dbox->x + dbox->width - 1;
- dy2 = dbox->y + dbox->height - 1;
-
- if (info->src.resource->target == PIPE_TEXTURE_3D)
- ssize = sslice->size0;
- else
- ssize = src->layout.layer_size;
-
- if (info->dst.resource->target == PIPE_TEXTURE_3D)
- dsize = dslice->size0;
- else
- dsize = dst->layout.layer_size;
-
- for (unsigned i = 0; i < info->dst.box.depth; i++) {
- unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
- unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
-
- debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));
- debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));
-
- OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
- OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
-
- /*
- * Emit source:
- */
- OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
- OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
- A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
- A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
- OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
- OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
- A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
- A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
- A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
-
- /*
- * Emit destination:
- */
- OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
- A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
- A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
- OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
- OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
- A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
- A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
- A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
-
- /*
- * Blit command:
- */
- OUT_PKT7(ring, CP_BLIT, 5);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
- OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));
- OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));
- OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));
- OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));
-
- OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
- OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
- }
+ const struct pipe_box *sbox = &info->src.box;
+ const struct pipe_box *dbox = &info->dst.box;
+ struct fd_resource *src, *dst;
+ struct fdl_slice *sslice, *dslice;
+ enum a5xx_color_fmt sfmt, dfmt;
+ enum a5xx_tile_mode stile, dtile;
+ enum a3xx_color_swap sswap, dswap;
+ unsigned ssize, dsize, spitch, dpitch;
+ unsigned sx1, sy1, sx2, sy2;
+ unsigned dx1, dy1, dx2, dy2;
+
+ src = fd_resource(info->src.resource);
+ dst = fd_resource(info->dst.resource);
+
+ sslice = fd_resource_slice(src, info->src.level);
+ dslice = fd_resource_slice(dst, info->dst.level);
+
+ sfmt = fd5_pipe2color(info->src.format);
+ dfmt = fd5_pipe2color(info->dst.format);
+
+ stile = fd_resource_tile_mode(info->src.resource, info->src.level);
+ dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
+
+ sswap = fd5_pipe2swap(info->src.format);
+ dswap = fd5_pipe2swap(info->dst.format);
+
+ spitch = fd_resource_pitch(src, info->src.level);
+ dpitch = fd_resource_pitch(dst, info->dst.level);
+
+ /* if dtile, then dswap ignored by hw, and likewise if stile then sswap
+ * ignored by hw.. but in this case we have already rejected the blit
+ * if src and dst formats differ, so juse use WZYX for both src and
+ * dst swap mode (so we don't change component order)
+ */
+ if (stile || dtile) {
+ debug_assert(info->src.format == info->dst.format);
+ sswap = dswap = WZYX;
+ }
+
+ sx1 = sbox->x;
+ sy1 = sbox->y;
+ sx2 = sbox->x + sbox->width - 1;
+ sy2 = sbox->y + sbox->height - 1;
+
+ dx1 = dbox->x;
+ dy1 = dbox->y;
+ dx2 = dbox->x + dbox->width - 1;
+ dy2 = dbox->y + dbox->height - 1;
+
+ if (info->src.resource->target == PIPE_TEXTURE_3D)
+ ssize = sslice->size0;
+ else
+ ssize = src->layout.layer_size;
+
+ if (info->dst.resource->target == PIPE_TEXTURE_3D)
+ dsize = dslice->size0;
+ else
+ dsize = dst->layout.layer_size;
+
+ for (unsigned i = 0; i < info->dst.box.depth; i++) {
+ unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
+ unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
+
+ debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));
+ debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));
+
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
+ OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
+
+ /*
+ * Emit source:
+ */
+ OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
+ OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+ A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
+ A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
+ OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
+ OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
+ A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+ A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
+ A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
+
+ /*
+ * Emit destination:
+ */
+ OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
+ A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
+ A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
+ OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
+ OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
+ A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
+ A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
+ A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
+
+ /*
+ * Blit command:
+ */
+ OUT_PKT7(ring, CP_BLIT, 5);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
+ OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));
+ OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));
+ OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));
+ OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));
+
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
+ OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
+ }
}
bool
-fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+fd5_blitter_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt
{
- struct fd_batch *batch;
+ struct fd_batch *batch;
- if (!can_do_blit(info)) {
- return false;
- }
+ if (!can_do_blit(info)) {
+ return false;
+ }
- batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
+ batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
- fd_batch_update_queries(batch);
+ fd_batch_update_queries(batch);
- emit_setup(batch->draw);
+ emit_setup(batch->draw);
- if ((info->src.resource->target == PIPE_BUFFER) &&
- (info->dst.resource->target == PIPE_BUFFER)) {
- assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);
- assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);
- emit_blit_buffer(batch->draw, info);
- } else {
- /* I don't *think* we need to handle blits between buffer <-> !buffer */
- debug_assert(info->src.resource->target != PIPE_BUFFER);
- debug_assert(info->dst.resource->target != PIPE_BUFFER);
- emit_blit(batch->draw, info);
- }
+ if ((info->src.resource->target == PIPE_BUFFER) &&
+ (info->dst.resource->target == PIPE_BUFFER)) {
+ assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);
+ assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);
+ emit_blit_buffer(batch->draw, info);
+ } else {
+ /* I don't *think* we need to handle blits between buffer <-> !buffer */
+ debug_assert(info->src.resource->target != PIPE_BUFFER);
+ debug_assert(info->dst.resource->target != PIPE_BUFFER);
+ emit_blit(batch->draw, info);
+ }
- fd_resource(info->dst.resource)->valid = true;
- batch->needs_flush = true;
+ fd_resource(info->dst.resource)->valid = true;
+ batch->needs_flush = true;
- fd_batch_flush(batch);
- fd_batch_reference(&batch, NULL);
+ fd_batch_flush(batch);
+ fd_batch_reference(&batch, NULL);
- /* Acc query state will have been dirtied by our fd_batch_update_queries, so
- * the ctx->batch may need to turn its queries back on.
- */
- ctx->update_active_queries = true;
+ /* Acc query state will have been dirtied by our fd_batch_update_queries, so
+ * the ctx->batch may need to turn its queries back on.
+ */
+ ctx->update_active_queries = true;
- return true;
+ return true;
}
unsigned
fd5_tile_mode(const struct pipe_resource *tmpl)
{
- /* basically just has to be a format we can blit, so uploads/downloads
- * via linear staging buffer works:
- */
- if (ok_format(tmpl->format))
- return TILE5_3;
+ /* basically just has to be a format we can blit, so uploads/downloads
+ * via linear staging buffer works:
+ */
+ if (ok_format(tmpl->format))
+ return TILE5_3;
- return TILE5_LINEAR;
+ return TILE5_LINEAR;
}
#include "freedreno_context.h"
-bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+bool fd5_blitter_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info);
unsigned fd5_tile_mode(const struct pipe_resource *tmpl);
#endif /* FD5_BLIT_H_ */
#include "fd5_context.h"
#include "fd5_emit.h"
-
/* maybe move to fd5_program? */
static void
cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
{
- const struct ir3_info *i = &v->info;
- enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
- unsigned instrlen = v->instrlen;
-
- /* if shader is more than 32*16 instructions, don't preload it. Similar
- * to the combined restriction of 64*16 for VS+FS
- */
- if (instrlen > 32)
- instrlen = 0;
-
- OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1);
- OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) |
- A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) |
- 0x00000880 /* XXX */);
-
- OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1);
- OUT_RING(ring, A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
- A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
- A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
- A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
- 0x6 /* XXX */);
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
- OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
- A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) |
- A5XX_HLSQ_CS_CONFIG_ENABLED);
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1);
- OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) |
- COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE));
-
- OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
- OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) |
- A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
- A5XX_SP_CS_CONFIG_ENABLED);
-
- assert(v->constlen % 4 == 0);
- unsigned constlen = v->constlen / 4;
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
- OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
- OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
- OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
- OUT_RING(ring, 0x1f00000);
-
- uint32_t local_invocation_id, work_group_id;
- local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
- work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2);
- OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
- A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
- A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
- A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
- OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */
-
- if (instrlen > 0)
- fd5_emit_shader(ring, v);
+ const struct ir3_info *i = &v->info;
+ enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
+ unsigned instrlen = v->instrlen;
+
+ /* if shader is more than 32*16 instructions, don't preload it. Similar
+ * to the combined restriction of 64*16 for VS+FS
+ */
+ if (instrlen > 32)
+ instrlen = 0;
+
+ OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1);
+ OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) |
+ A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) |
+ 0x00000880 /* XXX */);
+
+ OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1);
+ OUT_RING(ring,
+ A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
+ A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
+ A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
+ A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
+ 0x3) | // XXX need to figure this out somehow..
+ 0x6 /* XXX */);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
+ OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
+ A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) |
+ A5XX_HLSQ_CS_CONFIG_ENABLED);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1);
+ OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) |
+ COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE));
+
+ OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
+ OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) |
+ A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
+ A5XX_SP_CS_CONFIG_ENABLED);
+
+ assert(v->constlen % 4 == 0);
+ unsigned constlen = v->constlen / 4;
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
+ OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
+ OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
+ OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0x1f00000);
+
+ uint32_t local_invocation_id, work_group_id;
+ local_invocation_id =
+ ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
+ work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2);
+ OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
+ A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
+ A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
+ A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
+ OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */
+
+ if (instrlen > 0)
+ fd5_emit_shader(ring, v);
}
static void
-fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
- assert_dt
+fd5_launch_grid(struct fd_context *ctx,
+ const struct pipe_grid_info *info) assert_dt
{
- struct ir3_shader_key key = {};
- struct ir3_shader_variant *v;
- struct fd_ringbuffer *ring = ctx->batch->draw;
- unsigned nglobal = 0;
-
- v = ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
- if (!v)
- return;
-
- if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
- cs_program_emit(ring, v);
-
- fd5_emit_cs_state(ctx, ring, v);
- fd5_emit_cs_consts(v, ring, ctx, info);
-
- u_foreach_bit(i, ctx->global_bindings.enabled_mask)
- nglobal++;
-
- if (nglobal > 0) {
- /* global resources don't otherwise get an OUT_RELOC(), since
- * the raw ptr address is emitted ir ir3_emit_cs_consts().
- * So to make the kernel aware that these buffers are referenced
- * by the batch, emit dummy reloc's as part of a no-op packet
- * payload:
- */
- OUT_PKT7(ring, CP_NOP, 2 * nglobal);
- u_foreach_bit(i, ctx->global_bindings.enabled_mask) {
- struct pipe_resource *prsc = ctx->global_bindings.buf[i];
- OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
- }
- }
-
- const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size;
- const unsigned *num_groups = info->grid;
- /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
- const unsigned work_dim = info->work_dim ? info->work_dim : 3;
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7);
- OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
- A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
- A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
- A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
- OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
- OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
- OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3);
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
-
- if (info->indirect) {
- struct fd_resource *rsc = fd_resource(info->indirect);
-
- fd5_emit_flush(ctx, ring);
-
- OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
- OUT_RING(ring, A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
- A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
- A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
- } else {
- OUT_PKT7(ring, CP_EXEC_CS, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
- OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
- OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
- }
+ struct ir3_shader_key key = {};
+ struct ir3_shader_variant *v;
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+ unsigned nglobal = 0;
+
+ v =
+ ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
+ if (!v)
+ return;
+
+ if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
+ cs_program_emit(ring, v);
+
+ fd5_emit_cs_state(ctx, ring, v);
+ fd5_emit_cs_consts(v, ring, ctx, info);
+
+ u_foreach_bit (i, ctx->global_bindings.enabled_mask)
+ nglobal++;
+
+ if (nglobal > 0) {
+ /* global resources don't otherwise get an OUT_RELOC(), since
+ * the raw ptr address is emitted ir ir3_emit_cs_consts().
+ * So to make the kernel aware that these buffers are referenced
+ * by the batch, emit dummy reloc's as part of a no-op packet
+ * payload:
+ */
+ OUT_PKT7(ring, CP_NOP, 2 * nglobal);
+ u_foreach_bit (i, ctx->global_bindings.enabled_mask) {
+ struct pipe_resource *prsc = ctx->global_bindings.buf[i];
+ OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
+ }
+ }
+
+ const unsigned *local_size =
+ info->block; // v->shader->nir->info->cs.local_size;
+ const unsigned *num_groups = info->grid;
+ /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
+ const unsigned work_dim = info->work_dim ? info->work_dim : 3;
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7);
+ OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
+ A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
+ A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
+ A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
+ OUT_RING(ring,
+ A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
+ OUT_RING(ring,
+ A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
+ OUT_RING(ring,
+ A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3);
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
+
+ if (info->indirect) {
+ struct fd_resource *rsc = fd_resource(info->indirect);
+
+ fd5_emit_flush(ctx, ring);
+
+ OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
+ OUT_RING(ring,
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
+ } else {
+ OUT_PKT7(ring, CP_EXEC_CS, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
+ OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
+ OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
+ }
}
void
-fd5_compute_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd5_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->launch_grid = fd5_launch_grid;
- pctx->create_compute_state = ir3_shader_compute_state_create;
- pctx->delete_compute_state = ir3_shader_state_delete;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->launch_grid = fd5_launch_grid;
+ pctx->create_compute_state = ir3_shader_compute_state_create;
+ pctx->delete_compute_state = ir3_shader_state_delete;
}
#include "freedreno_query_acc.h"
-#include "fd5_context.h"
#include "fd5_blend.h"
#include "fd5_blitter.h"
#include "fd5_compute.h"
+#include "fd5_context.h"
#include "fd5_draw.h"
#include "fd5_emit.h"
#include "fd5_gmem.h"
#include "fd5_zsa.h"
static void
-fd5_context_destroy(struct pipe_context *pctx)
- in_dt
+fd5_context_destroy(struct pipe_context *pctx) in_dt
{
- struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
+ struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
- u_upload_destroy(fd5_ctx->border_color_uploader);
- pipe_resource_reference(&fd5_ctx->border_color_buf, NULL);
+ u_upload_destroy(fd5_ctx->border_color_uploader);
+ pipe_resource_reference(&fd5_ctx->border_color_buf, NULL);
- fd_context_destroy(pctx);
+ fd_context_destroy(pctx);
- fd_bo_del(fd5_ctx->vsc_size_mem);
- fd_bo_del(fd5_ctx->blit_mem);
+ fd_bo_del(fd5_ctx->vsc_size_mem);
+ fd_bo_del(fd5_ctx->blit_mem);
- fd_context_cleanup_common_vbos(&fd5_ctx->base);
+ fd_context_cleanup_common_vbos(&fd5_ctx->base);
- free(fd5_ctx);
+ free(fd5_ctx);
}
/* clang-format off */
/* clang-format on */
struct pipe_context *
-fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
- disable_thread_safety_analysis
+fd5_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags) disable_thread_safety_analysis
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
- struct pipe_context *pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
+ struct pipe_context *pctx;
- if (!fd5_ctx)
- return NULL;
+ if (!fd5_ctx)
+ return NULL;
- pctx = &fd5_ctx->base.base;
- pctx->screen = pscreen;
+ pctx = &fd5_ctx->base.base;
+ pctx->screen = pscreen;
- fd5_ctx->base.dev = fd_device_ref(screen->dev);
- fd5_ctx->base.screen = fd_screen(pscreen);
- fd5_ctx->base.last.key = &fd5_ctx->last_key;
+ fd5_ctx->base.dev = fd_device_ref(screen->dev);
+ fd5_ctx->base.screen = fd_screen(pscreen);
+ fd5_ctx->base.last.key = &fd5_ctx->last_key;
- pctx->destroy = fd5_context_destroy;
- pctx->create_blend_state = fd5_blend_state_create;
- pctx->create_rasterizer_state = fd5_rasterizer_state_create;
- pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
+ pctx->destroy = fd5_context_destroy;
+ pctx->create_blend_state = fd5_blend_state_create;
+ pctx->create_rasterizer_state = fd5_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
- fd5_draw_init(pctx);
- fd5_compute_init(pctx);
- fd5_gmem_init(pctx);
- fd5_texture_init(pctx);
- fd5_prog_init(pctx);
- fd5_emit_init(pctx);
+ fd5_draw_init(pctx);
+ fd5_compute_init(pctx);
+ fd5_gmem_init(pctx);
+ fd5_texture_init(pctx);
+ fd5_prog_init(pctx);
+ fd5_emit_init(pctx);
- if (!FD_DBG(NOBLIT))
- fd5_ctx->base.blit = fd5_blitter_blit;
+ if (!FD_DBG(NOBLIT))
+ fd5_ctx->base.blit = fd5_blitter_blit;
- pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags);
- if (!pctx)
- return NULL;
+ pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags);
+ if (!pctx)
+ return NULL;
- util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
+ util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
- fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
+ fd5_ctx->vsc_size_mem =
+ fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
- fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
+ fd5_ctx->blit_mem =
+ fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
- fd_context_setup_common_vbos(&fd5_ctx->base);
+ fd_context_setup_common_vbos(&fd5_ctx->base);
- fd5_query_context_init(pctx);
+ fd5_query_context_init(pctx);
- fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
- PIPE_USAGE_STREAM, 0);
+ fd5_ctx->border_color_uploader =
+ u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
- return pctx;
+ return pctx;
}
#include "ir3/ir3_shader.h"
struct fd5_context {
- struct fd_context base;
+ struct fd_context base;
- /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
- * could combine it with another allocation.
- */
- struct fd_bo *vsc_size_mem;
+ /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
+ * could combine it with another allocation.
+ */
+ struct fd_bo *vsc_size_mem;
- /* TODO not sure what this is for.. probably similar to
- * CACHE_FLUSH_TS on kernel side, where value gets written
- * to this address synchronized w/ 3d (ie. a way to
- * synchronize when the CP is running far ahead)
- */
- struct fd_bo *blit_mem;
+ /* TODO not sure what this is for.. probably similar to
+ * CACHE_FLUSH_TS on kernel side, where value gets written
+ * to this address synchronized w/ 3d (ie. a way to
+ * synchronize when the CP is running far ahead)
+ */
+ struct fd_bo *blit_mem;
- struct u_upload_mgr *border_color_uploader;
- struct pipe_resource *border_color_buf;
+ struct u_upload_mgr *border_color_uploader;
+ struct pipe_resource *border_color_buf;
- /* bitmask of samplers which need astc srgb workaround: */
- uint16_t vastc_srgb, fastc_srgb;
+ /* bitmask of samplers which need astc srgb workaround: */
+ uint16_t vastc_srgb, fastc_srgb;
- /* storage for ctx->last.key: */
- struct ir3_shader_key last_key;
+ /* storage for ctx->last.key: */
+ struct ir3_shader_key last_key;
- /* number of active samples-passed queries: */
- int samples_passed_queries;
+ /* number of active samples-passed queries: */
+ int samples_passed_queries;
- /* cached state about current emitted shader program (3d): */
- unsigned max_loc;
+ /* cached state about current emitted shader program (3d): */
+ unsigned max_loc;
};
static inline struct fd5_context *
fd5_context(struct fd_context *ctx)
{
- return (struct fd5_context *)ctx;
+ return (struct fd5_context *)ctx;
}
-struct pipe_context *
-fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+struct pipe_context *fd5_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags);
/* helper for places where we need to stall CP to wait for previous draws: */
static inline void
fd5_emit_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CACHE_FLUSH_TS);
- OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
- OUT_RING(ring, 0x00000000);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring, CACHE_FLUSH_TS);
+ OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
+ OUT_RING(ring, 0x00000000);
- OUT_WFI5(ring);
+ OUT_WFI5(ring);
}
#endif /* FD5_CONTEXT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd5_draw.h"
#include "fd5_context.h"
+#include "fd5_draw.h"
#include "fd5_emit.h"
-#include "fd5_program.h"
#include "fd5_format.h"
+#include "fd5_program.h"
#include "fd5_zsa.h"
-
static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit, unsigned index_offset)
- assert_dt
+ struct fd5_emit *emit, unsigned index_offset) assert_dt
{
- const struct pipe_draw_info *info = emit->info;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+ const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
- fd5_emit_state(ctx, ring, emit);
+ fd5_emit_state(ctx, ring, emit);
- if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
- fd5_emit_vertex_bufs(ring, emit);
+ if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+ fd5_emit_vertex_bufs(ring, emit);
- OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
- OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */
- OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
+ OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, info->index_size ? info->index_bias
+ : emit->draw->start); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
- OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
- info->restart_index : 0xffffffff);
+ OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+ info->restart_index
+ : 0xffffffff);
- fd5_emit_render_cntl(ctx, false, emit->binning_pass);
- fd5_draw_emit(ctx->batch, ring, primtype,
- emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
- info, emit->indirect, emit->draw, index_offset);
+ fd5_emit_render_cntl(ctx, false, emit->binning_pass);
+ fd5_draw_emit(ctx->batch, ring, primtype,
+ emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
+ emit->indirect, emit->draw, index_offset);
}
static bool
fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
- unsigned index_offset)
- in_dt
+ unsigned index_offset) in_dt
{
- struct fd5_context *fd5_ctx = fd5_context(ctx);
- struct fd5_emit emit = {
- .debug = &ctx->debug,
- .vtx = &ctx->vtx,
- .info = info,
- .indirect = indirect,
- .draw = draw,
- .key = {
- .vs = ctx->prog.vs,
- .fs = ctx->prog.fs,
- .key = {
- .rasterflat = ctx->rasterizer->flatshade,
- .has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb,
- .vastc_srgb = fd5_ctx->vastc_srgb,
- .fastc_srgb = fd5_ctx->fastc_srgb,
- },
- },
- .rasterflat = ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
- .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
- };
-
- /* Technically a5xx should not require this, but it avoids a crash in
- * piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with
- * no VBO bound but a VS that expects an input. The draw is a single
- * vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it
- * to be skipped.
- */
- if (info->mode != PIPE_PRIM_MAX &&
- !indirect &&
- !info->primitive_restart &&
- !u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
- return false;
-
- ir3_fixup_shader_state(&ctx->base, &emit.key.key);
-
- unsigned dirty = ctx->dirty;
-
- emit.prog = fd5_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
-
- /* bail if compile failed: */
- if (!emit.prog)
- return false;
-
- const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
- const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
-
- ir3_update_max_tf_vtx(ctx, vp);
-
- /* do regular pass first: */
-
- if (unlikely(ctx->stats_users > 0)) {
- ctx->stats.vs_regs += ir3_shader_halfregs(vp);
- ctx->stats.fs_regs += ir3_shader_halfregs(fp);
- }
-
- /* figure out whether we need to disable LRZ write for binning
- * pass using draw pass's fp:
- */
- emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill;
-
- emit.binning_pass = false;
- emit.dirty = dirty;
-
- draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
-
- /* and now binning pass: */
- emit.binning_pass = true;
- emit.dirty = dirty & ~(FD_DIRTY_BLEND);
- emit.vs = NULL; /* we changed key so need to refetch vp */
- emit.fs = NULL;
- draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
-
- if (emit.streamout_mask) {
- struct fd_ringbuffer *ring = ctx->batch->draw;
-
- for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
- if (emit.streamout_mask & (1 << i)) {
- fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
- }
- }
- }
-
- fd_context_all_clean(ctx);
-
- return true;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ struct fd5_emit emit = {
+ .debug = &ctx->debug,
+ .vtx = &ctx->vtx,
+ .info = info,
+ .indirect = indirect,
+ .draw = draw,
+ .key =
+ {
+ .vs = ctx->prog.vs,
+ .fs = ctx->prog.fs,
+ .key =
+ {
+ .rasterflat = ctx->rasterizer->flatshade,
+ .has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb,
+ .vastc_srgb = fd5_ctx->vastc_srgb,
+ .fastc_srgb = fd5_ctx->fastc_srgb,
+ },
+ },
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+ };
+
+ /* Technically a5xx should not require this, but it avoids a crash in
+ * piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with
+ * no VBO bound but a VS that expects an input. The draw is a single
+ * vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it
+ * to be skipped.
+ */
+ if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
+ return false;
+
+ ir3_fixup_shader_state(&ctx->base, &emit.key.key);
+
+ unsigned dirty = ctx->dirty;
+
+ emit.prog = fd5_program_state(
+ ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
+
+ /* bail if compile failed: */
+ if (!emit.prog)
+ return false;
+
+ const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
+ const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
+
+ ir3_update_max_tf_vtx(ctx, vp);
+
+ /* do regular pass first: */
+
+ if (unlikely(ctx->stats_users > 0)) {
+ ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+ ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+ }
+
+ /* figure out whether we need to disable LRZ write for binning
+ * pass using draw pass's fp:
+ */
+ emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill;
+
+ emit.binning_pass = false;
+ emit.dirty = dirty;
+
+ draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
+
+ /* and now binning pass: */
+ emit.binning_pass = true;
+ emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+ emit.vs = NULL; /* we changed key so need to refetch vp */
+ emit.fs = NULL;
+ draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
+
+ if (emit.streamout_mask) {
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+
+ for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ if (emit.streamout_mask & (1 << i)) {
+ fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
+ }
+ }
+ }
+
+ fd_context_all_clean(ctx);
+
+ return true;
}
-static bool is_z32(enum pipe_format format)
+static bool
+is_z32(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- case PIPE_FORMAT_Z32_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- return true;
- default:
- return false;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return true;
+ default:
+ return false;
+ }
}
static void
fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
{
- struct fd_ringbuffer *ring;
- uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
+ struct fd_ringbuffer *ring;
+ uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
- ring = fd_batch_get_prologue(batch);
+ ring = fd_batch_get_prologue(batch);
- OUT_WFI5(ring);
+ OUT_WFI5(ring);
- OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
- OUT_RING(ring, 0x10000000);
+ OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x10000000);
- OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
- OUT_RING(ring, 0x20fffff);
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0x20fffff);
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
- OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
- COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
+ COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
- OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, 0x00000181);
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000181);
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
- OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
- A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
- A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
- OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
- OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
- OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
+ OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
+ OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
+ OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
+ OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
- OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
- OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
+ OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
- A5XX_RB_CLEAR_CNTL_MASK(0xf));
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
- OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
- OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
- OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
- A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
- OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
+ OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
+ OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
+ A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
+ OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
- A5XX_RB_RESOLVE_CNTL_1_Y(0));
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
- A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
+ A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
- fd5_emit_blit(batch, ring);
+ fd5_emit_blit(batch, ring);
}
static bool
fd5_clear(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
- assert_dt
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) assert_dt
{
- struct fd_ringbuffer *ring = ctx->batch->draw;
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
-
- if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
- is_z32(pfb->zsbuf->format))
- return false;
-
- fd5_emit_render_cntl(ctx, true, false);
-
- if (buffers & PIPE_CLEAR_COLOR) {
- for (int i = 0; i < pfb->nr_cbufs; i++) {
- union util_color uc = {0};
-
- if (!pfb->cbufs[i])
- continue;
-
- if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
- continue;
-
- enum pipe_format pfmt = pfb->cbufs[i]->format;
-
- // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
- union pipe_color_union swapped;
- switch (fd5_pipe2swap(pfmt)) {
- case WZYX:
- swapped.ui[0] = color->ui[0];
- swapped.ui[1] = color->ui[1];
- swapped.ui[2] = color->ui[2];
- swapped.ui[3] = color->ui[3];
- break;
- case WXYZ:
- swapped.ui[2] = color->ui[0];
- swapped.ui[1] = color->ui[1];
- swapped.ui[0] = color->ui[2];
- swapped.ui[3] = color->ui[3];
- break;
- case ZYXW:
- swapped.ui[3] = color->ui[0];
- swapped.ui[0] = color->ui[1];
- swapped.ui[1] = color->ui[2];
- swapped.ui[2] = color->ui[3];
- break;
- case XYZW:
- swapped.ui[3] = color->ui[0];
- swapped.ui[2] = color->ui[1];
- swapped.ui[1] = color->ui[2];
- swapped.ui[0] = color->ui[3];
- break;
- }
-
- util_pack_color_union(pfmt, &uc, &swapped);
-
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
-
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
- A5XX_RB_CLEAR_CNTL_MASK(0xf));
-
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
- OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */
- OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */
- OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */
- OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */
-
- fd5_emit_blit(ctx->batch, ring);
- }
- }
-
- if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
- uint32_t clear =
- util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
- uint32_t mask = 0;
-
- if (buffers & PIPE_CLEAR_DEPTH)
- mask |= 0x1;
-
- if (buffers & PIPE_CLEAR_STENCIL)
- mask |= 0x2;
-
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
-
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
- A5XX_RB_CLEAR_CNTL_MASK(mask));
-
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
- OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
-
- fd5_emit_blit(ctx->batch, ring);
-
- if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
- struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
- if (zsbuf->lrz) {
- zsbuf->lrz_valid = true;
- fd5_clear_lrz(ctx->batch, zsbuf, depth);
- }
- }
- }
-
- /* disable fast clear to not interfere w/ gmem->mem, etc.. */
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */
-
- return true;
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+
+ if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
+ is_z32(pfb->zsbuf->format))
+ return false;
+
+ fd5_emit_render_cntl(ctx, true, false);
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ for (int i = 0; i < pfb->nr_cbufs; i++) {
+ union util_color uc = {0};
+
+ if (!pfb->cbufs[i])
+ continue;
+
+ if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+
+ enum pipe_format pfmt = pfb->cbufs[i]->format;
+
+ // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
+ union pipe_color_union swapped;
+ switch (fd5_pipe2swap(pfmt)) {
+ case WZYX:
+ swapped.ui[0] = color->ui[0];
+ swapped.ui[1] = color->ui[1];
+ swapped.ui[2] = color->ui[2];
+ swapped.ui[3] = color->ui[3];
+ break;
+ case WXYZ:
+ swapped.ui[2] = color->ui[0];
+ swapped.ui[1] = color->ui[1];
+ swapped.ui[0] = color->ui[2];
+ swapped.ui[3] = color->ui[3];
+ break;
+ case ZYXW:
+ swapped.ui[3] = color->ui[0];
+ swapped.ui[0] = color->ui[1];
+ swapped.ui[1] = color->ui[2];
+ swapped.ui[2] = color->ui[3];
+ break;
+ case XYZW:
+ swapped.ui[3] = color->ui[0];
+ swapped.ui[2] = color->ui[1];
+ swapped.ui[1] = color->ui[2];
+ swapped.ui[0] = color->ui[3];
+ break;
+ }
+
+ util_pack_color_union(pfmt, &uc, &swapped);
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
+ OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */
+ OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */
+ OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */
+ OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */
+
+ fd5_emit_blit(ctx->batch, ring);
+ }
+ }
+
+ if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ uint32_t clear = util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
+ uint32_t mask = 0;
+
+ if (buffers & PIPE_CLEAR_DEPTH)
+ mask |= 0x1;
+
+ if (buffers & PIPE_CLEAR_STENCIL)
+ mask |= 0x2;
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(mask));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
+
+ fd5_emit_blit(ctx->batch, ring);
+
+ if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+ if (zsbuf->lrz) {
+ zsbuf->lrz_valid = true;
+ fd5_clear_lrz(ctx->batch, zsbuf, depth);
+ }
+ }
+ }
+
+ /* disable fast clear to not interfere w/ gmem->mem, etc.. */
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */
+
+ return true;
}
void
-fd5_draw_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd5_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->draw_vbo = fd5_draw_vbo;
- ctx->clear = fd5_clear;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd5_draw_vbo;
+ ctx->clear = fd5_clear;
}
static inline void
fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- enum pc_di_src_sel src_sel, uint32_t count,
- uint32_t instances, enum a4xx_index_size idx_type,
- uint32_t max_indices, uint32_t idx_offset,
- struct pipe_resource *idx_buffer)
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
+ enum a4xx_index_size idx_type, uint32_t max_indices,
+ uint32_t idx_offset, struct pipe_resource *idx_buffer)
{
- /* for debug after a lock up, write a unique counter value
- * to scratch7 for each draw, to make it easier to match up
- * register dumps to cmdstream. The combination of IB
- * (scratch6) and DRAW is enough to "triangulate" the
- * particular draw that caused lockup.
- */
- emit_marker5(ring, 7);
-
- OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3);
- if (vismode == USE_VISIBILITY) {
- /* leave vis mode blank for now, it will be patched up when
- * we know if we are binning or not
- */
- OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
- &batch->draw_patches);
- } else {
- OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
- }
- OUT_RING(ring, instances); /* NumInstances */
- OUT_RING(ring, count); /* NumIndices */
- if (idx_buffer) {
- OUT_RING(ring, 0x0); /* XXX */
- OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
- OUT_RING (ring, max_indices);
- }
-
- emit_marker5(ring, 7);
-
- fd_reset_wfi(batch);
+ /* for debug after a lock up, write a unique counter value
+ * to scratch7 for each draw, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB
+ * (scratch6) and DRAW is enough to "triangulate" the
+ * particular draw that caused lockup.
+ */
+ emit_marker5(ring, 7);
+
+ OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3);
+ if (vismode == USE_VISIBILITY) {
+ /* leave vis mode blank for now, it will be patched up when
+ * we know if we are binning or not
+ */
+ OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+ &batch->draw_patches);
+ } else {
+ OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+ }
+ OUT_RING(ring, instances); /* NumInstances */
+ OUT_RING(ring, count); /* NumIndices */
+ if (idx_buffer) {
+ OUT_RING(ring, 0x0); /* XXX */
+ OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
+ OUT_RING(ring, max_indices);
+ }
+
+ emit_marker5(ring, 7);
+
+ fd_reset_wfi(batch);
}
static inline void
fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- const struct pipe_draw_info *info,
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draw,
- unsigned index_offset)
+ const struct pipe_draw_start_count *draw, unsigned index_offset)
{
- struct pipe_resource *idx_buffer = NULL;
- enum a4xx_index_size idx_type;
- enum pc_di_src_sel src_sel;
- uint32_t max_indices, idx_offset;
-
- if (indirect && indirect->buffer) {
- struct fd_resource *ind = fd_resource(indirect->buffer);
-
- emit_marker5(ring, 7);
-
- if (info->index_size) {
- struct pipe_resource *idx = info->index.resource;
- max_indices = idx->width0 / info->index_size;
-
- OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
- OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA,
- fd4_size2indextype(info->index_size), 0),
- &batch->draw_patches);
- OUT_RELOC(ring, fd_resource(idx)->bo,
- index_offset, 0, 0);
- OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices));
- OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
- } else {
- OUT_PKT7(ring, CP_DRAW_INDIRECT, 3);
- OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
- &batch->draw_patches);
- OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
- }
-
- emit_marker5(ring, 7);
- fd_reset_wfi(batch);
-
- return;
- }
-
- if (info->index_size) {
- assert(!info->has_user_indices);
-
- idx_buffer = info->index.resource;
- idx_type = fd4_size2indextype(info->index_size);
- max_indices = idx_buffer->width0 / info->index_size;
- idx_offset = index_offset + draw->start * info->index_size;
- src_sel = DI_SRC_SEL_DMA;
- } else {
- idx_buffer = NULL;
- idx_type = INDEX4_SIZE_32_BIT;
- max_indices = 0;
- idx_offset = 0;
- src_sel = DI_SRC_SEL_AUTO_INDEX;
- }
-
- fd5_draw(batch, ring, primtype, vismode, src_sel,
- draw->count, info->instance_count,
- idx_type, max_indices, idx_offset, idx_buffer);
+ struct pipe_resource *idx_buffer = NULL;
+ enum a4xx_index_size idx_type;
+ enum pc_di_src_sel src_sel;
+ uint32_t max_indices, idx_offset;
+
+ if (indirect && indirect->buffer) {
+ struct fd_resource *ind = fd_resource(indirect->buffer);
+
+ emit_marker5(ring, 7);
+
+ if (info->index_size) {
+ struct pipe_resource *idx = info->index.resource;
+ max_indices = idx->width0 / info->index_size;
+
+ OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
+ OUT_RINGP(ring,
+ DRAW4(primtype, DI_SRC_SEL_DMA,
+ fd4_size2indextype(info->index_size), 0),
+ &batch->draw_patches);
+ OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
+ OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices));
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ } else {
+ OUT_PKT7(ring, CP_DRAW_INDIRECT, 3);
+ OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
+ &batch->draw_patches);
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ }
+
+ emit_marker5(ring, 7);
+ fd_reset_wfi(batch);
+
+ return;
+ }
+
+ if (info->index_size) {
+ assert(!info->has_user_indices);
+
+ idx_buffer = info->index.resource;
+ idx_type = fd4_size2indextype(info->index_size);
+ max_indices = idx_buffer->width0 / info->index_size;
+ idx_offset = index_offset + draw->start * info->index_size;
+ src_sel = DI_SRC_SEL_DMA;
+ } else {
+ idx_buffer = NULL;
+ idx_type = INDEX4_SIZE_32_BIT;
+ max_indices = 0;
+ idx_offset = 0;
+ src_sel = DI_SRC_SEL_AUTO_INDEX;
+ }
+
+ fd5_draw(batch, ring, primtype, vismode, src_sel, draw->count,
+ info->instance_count, idx_type, max_indices, idx_offset,
+ idx_buffer);
}
#endif /* FD5_DRAW_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_helpers.h"
#include "util/format/u_format.h"
+#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "util/u_viewport.h"
-#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
-#include "fd5_emit.h"
#include "fd5_blend.h"
#include "fd5_blitter.h"
#include "fd5_context.h"
+#include "fd5_emit.h"
+#include "fd5_format.h"
#include "fd5_image.h"
#include "fd5_program.h"
#include "fd5_rasterizer.h"
-#include "fd5_texture.h"
#include "fd5_screen.h"
-#include "fd5_format.h"
+#include "fd5_texture.h"
#include "fd5_zsa.h"
#define emit_const_user fd5_emit_const_user
-#define emit_const_bo fd5_emit_const_bo
+#define emit_const_bo fd5_emit_const_bo
#include "ir3_const.h"
/* regid: base const register
*/
static void
fd5_emit_const_user(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid, uint32_t sizedwords,
- const uint32_t *dwords)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
{
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sizedwords);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4));
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- for (int i = 0; i < sizedwords; i++)
- OUT_RING(ring, ((uint32_t *)dwords)[i]);
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sizedwords);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(sizedwords / 4));
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (int i = 0; i < sizedwords; i++)
+ OUT_RING(ring, ((uint32_t *)dwords)[i]);
}
static void
-fd5_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
- uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
+fd5_emit_const_bo(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
{
- uint32_t dst_off = regid / 4;
- assert(dst_off % 4 == 0);
- uint32_t num_unit = sizedwords / 4;
- assert(num_unit % 4 == 0);
-
- emit_const_asserts(ring, v, regid, sizedwords);
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
- OUT_RELOC(ring, bo, offset,
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
+ uint32_t dst_off = regid / 4;
+ assert(dst_off % 4 == 0);
+ uint32_t num_unit = sizedwords / 4;
+ assert(num_unit % 4 == 0);
+
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
+ OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
}
static void
fd5_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type,
- uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+ uint32_t regid, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- uint32_t anum = align(num, 2);
- uint32_t i;
-
- debug_assert((regid % 4) == 0);
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * anum));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
- CP_LOAD_STATE4_0_NUM_UNIT(anum/2));
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-
- for (i = 0; i < num; i++) {
- if (bos[i]) {
- OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
- } else {
- OUT_RING(ring, 0xbad00000 | (i << 16));
- OUT_RING(ring, 0xbad00000 | (i << 16));
- }
- }
-
- for (; i < anum; i++) {
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0xffffffff);
- }
+ uint32_t anum = align(num, 2);
+ uint32_t i;
+
+ debug_assert((regid % 4) == 0);
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * anum));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
+ CP_LOAD_STATE4_0_NUM_UNIT(anum / 2));
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ }
+ }
+
+ for (; i < anum; i++) {
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0xffffffff);
+ }
}
static bool
is_stateobj(struct fd_ringbuffer *ring)
{
- return false;
+ return false;
}
static void
-emit_const_ptrs(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
+ uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- /* TODO inline this */
- assert(dst_offset + num <= v->constlen * 4);
- fd5_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
+ /* TODO inline this */
+ assert(dst_offset + num <= v->constlen * 4);
+ fd5_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
}
void
-fd5_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_grid_info *info)
+fd5_emit_cs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_grid_info *info)
{
- ir3_emit_cs_consts(v, ring, ctx, info);
+ ir3_emit_cs_consts(v, ring, ctx, info);
}
/* Border color layout is diff from a4xx/a5xx.. if it turns out to be
*/
struct PACKED bcolor_entry {
- uint32_t fp32[4];
- uint16_t ui16[4];
- int16_t si16[4];
-
- uint16_t fp16[4];
- uint16_t rgb565;
- uint16_t rgb5a1;
- uint16_t rgba4;
- uint8_t __pad0[2];
- uint8_t ui8[4];
- int8_t si8[4];
- uint32_t rgb10a2;
- uint32_t z24; /* also s8? */
-
- uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
- uint8_t __pad1[24];
+ uint32_t fp32[4];
+ uint16_t ui16[4];
+ int16_t si16[4];
+
+ uint16_t fp16[4];
+ uint16_t rgb565;
+ uint16_t rgb5a1;
+ uint16_t rgba4;
+ uint8_t __pad0[2];
+ uint8_t ui8[4];
+ int8_t si8[4];
+ uint32_t rgb10a2;
+ uint32_t z24; /* also s8? */
+
+ uint16_t
+ srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
+ uint8_t __pad1[24];
};
-#define FD5_BORDER_COLOR_SIZE 0x60
-#define FD5_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE)
+#define FD5_BORDER_COLOR_SIZE 0x60
+#define FD5_BORDER_COLOR_UPLOAD_SIZE \
+ (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE)
static void
-setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries)
+setup_border_colors(struct fd_texture_stateobj *tex,
+ struct bcolor_entry *entries)
{
- unsigned i, j;
- STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
-
- for (i = 0; i < tex->num_samplers; i++) {
- struct bcolor_entry *e = &entries[i];
- struct pipe_sampler_state *sampler = tex->samplers[i];
- union pipe_color_union *bc;
-
- if (!sampler)
- continue;
-
- bc = &sampler->border_color;
-
- /*
- * XXX HACK ALERT XXX
- *
- * The border colors need to be swizzled in a particular
- * format-dependent order. Even though samplers don't know about
- * formats, we can assume that with a GL state tracker, there's a
- * 1:1 correspondence between sampler and texture. Take advantage
- * of that knowledge.
- */
- if ((i >= tex->num_textures) || !tex->textures[i])
- continue;
-
- enum pipe_format format = tex->textures[i]->format;
- const struct util_format_description *desc =
- util_format_description(format);
-
- e->rgb565 = 0;
- e->rgb5a1 = 0;
- e->rgba4 = 0;
- e->rgb10a2 = 0;
- e->z24 = 0;
-
- for (j = 0; j < 4; j++) {
- int c = desc->swizzle[j];
- int cd = c;
-
- /*
- * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
- * stencil border color value in bc->ui[0] but according
- * to desc->swizzle and desc->channel, the .x component
- * is NONE and the stencil value is in the y component.
- * Meanwhile the hardware wants this in the .x componetn.
- */
- if ((format == PIPE_FORMAT_X24S8_UINT) ||
- (format == PIPE_FORMAT_X32_S8X24_UINT)) {
- if (j == 0) {
- c = 1;
- cd = 0;
- } else {
- continue;
- }
- }
-
- if (c >= 4)
- continue;
-
- if (desc->channel[c].pure_integer) {
- uint16_t clamped;
- switch (desc->channel[c].size) {
- case 2:
- assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
- clamped = CLAMP(bc->ui[j], 0, 0x3);
- break;
- case 8:
- if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
- clamped = CLAMP(bc->i[j], -128, 127);
- else
- clamped = CLAMP(bc->ui[j], 0, 255);
- break;
- case 10:
- assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
- clamped = CLAMP(bc->ui[j], 0, 0x3ff);
- break;
- case 16:
- if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
- clamped = CLAMP(bc->i[j], -32768, 32767);
- else
- clamped = CLAMP(bc->ui[j], 0, 65535);
- break;
- default:
- assert(!"Unexpected bit size");
- case 32:
- clamped = 0;
- break;
- }
- e->fp32[cd] = bc->ui[j];
- e->fp16[cd] = clamped;
- } else {
- float f = bc->f[j];
- float f_u = CLAMP(f, 0, 1);
- float f_s = CLAMP(f, -1, 1);
-
- e->fp32[c] = fui(f);
- e->fp16[c] = _mesa_float_to_half(f);
- e->srgb[c] = _mesa_float_to_half(f_u);
- e->ui16[c] = f_u * 0xffff;
- e->si16[c] = f_s * 0x7fff;
- e->ui8[c] = f_u * 0xff;
- e->si8[c] = f_s * 0x7f;
- if (c == 1)
- e->rgb565 |= (int)(f_u * 0x3f) << 5;
- else if (c < 3)
- e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
- if (c == 3)
- e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;
- else
- e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
- if (c == 3)
- e->rgb10a2 |= (int)(f_u * 0x3) << 30;
- else
- e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
- e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
- if (c == 0)
- e->z24 = f_u * 0xffffff;
- }
- }
+ unsigned i, j;
+ STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
+
+ for (i = 0; i < tex->num_samplers; i++) {
+ struct bcolor_entry *e = &entries[i];
+ struct pipe_sampler_state *sampler = tex->samplers[i];
+ union pipe_color_union *bc;
+
+ if (!sampler)
+ continue;
+
+ bc = &sampler->border_color;
+
+ /*
+ * XXX HACK ALERT XXX
+ *
+ * The border colors need to be swizzled in a particular
+ * format-dependent order. Even though samplers don't know about
+ * formats, we can assume that with a GL state tracker, there's a
+ * 1:1 correspondence between sampler and texture. Take advantage
+ * of that knowledge.
+ */
+ if ((i >= tex->num_textures) || !tex->textures[i])
+ continue;
+
+ enum pipe_format format = tex->textures[i]->format;
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ e->rgb565 = 0;
+ e->rgb5a1 = 0;
+ e->rgba4 = 0;
+ e->rgb10a2 = 0;
+ e->z24 = 0;
+
+ for (j = 0; j < 4; j++) {
+ int c = desc->swizzle[j];
+ int cd = c;
+
+ /*
+ * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
+ * stencil border color value in bc->ui[0] but according
+ * to desc->swizzle and desc->channel, the .x component
+ * is NONE and the stencil value is in the y component.
+ * Meanwhile the hardware wants this in the .x componetn.
+ */
+ if ((format == PIPE_FORMAT_X24S8_UINT) ||
+ (format == PIPE_FORMAT_X32_S8X24_UINT)) {
+ if (j == 0) {
+ c = 1;
+ cd = 0;
+ } else {
+ continue;
+ }
+ }
+
+ if (c >= 4)
+ continue;
+
+ if (desc->channel[c].pure_integer) {
+ uint16_t clamped;
+ switch (desc->channel[c].size) {
+ case 2:
+ assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ clamped = CLAMP(bc->ui[j], 0, 0x3);
+ break;
+ case 8:
+ if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
+ clamped = CLAMP(bc->i[j], -128, 127);
+ else
+ clamped = CLAMP(bc->ui[j], 0, 255);
+ break;
+ case 10:
+ assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ clamped = CLAMP(bc->ui[j], 0, 0x3ff);
+ break;
+ case 16:
+ if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
+ clamped = CLAMP(bc->i[j], -32768, 32767);
+ else
+ clamped = CLAMP(bc->ui[j], 0, 65535);
+ break;
+ default:
+ assert(!"Unexpected bit size");
+ case 32:
+ clamped = 0;
+ break;
+ }
+ e->fp32[cd] = bc->ui[j];
+ e->fp16[cd] = clamped;
+ } else {
+ float f = bc->f[j];
+ float f_u = CLAMP(f, 0, 1);
+ float f_s = CLAMP(f, -1, 1);
+
+ e->fp32[c] = fui(f);
+ e->fp16[c] = _mesa_float_to_half(f);
+ e->srgb[c] = _mesa_float_to_half(f_u);
+ e->ui16[c] = f_u * 0xffff;
+ e->si16[c] = f_s * 0x7fff;
+ e->ui8[c] = f_u * 0xff;
+ e->si8[c] = f_s * 0x7f;
+ if (c == 1)
+ e->rgb565 |= (int)(f_u * 0x3f) << 5;
+ else if (c < 3)
+ e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
+ if (c == 3)
+ e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;
+ else
+ e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
+ if (c == 3)
+ e->rgb10a2 |= (int)(f_u * 0x3) << 30;
+ else
+ e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
+ e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
+ if (c == 0)
+ e->z24 = f_u * 0xffffff;
+ }
+ }
#ifdef DEBUG
- memset(&e->__pad0, 0, sizeof(e->__pad0));
- memset(&e->__pad1, 0, sizeof(e->__pad1));
+ memset(&e->__pad0, 0, sizeof(e->__pad0));
+ memset(&e->__pad1, 0, sizeof(e->__pad1));
#endif
- }
+ }
}
static void
-emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
- assert_dt
+emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
{
- struct fd5_context *fd5_ctx = fd5_context(ctx);
- struct bcolor_entry *entries;
- unsigned off;
- void *ptr;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ struct bcolor_entry *entries;
+ unsigned off;
+ void *ptr;
- STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
+ STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
- u_upload_alloc(fd5_ctx->border_color_uploader,
- 0, FD5_BORDER_COLOR_UPLOAD_SIZE,
- FD5_BORDER_COLOR_UPLOAD_SIZE, &off,
- &fd5_ctx->border_color_buf,
- &ptr);
+ u_upload_alloc(fd5_ctx->border_color_uploader, 0,
+ FD5_BORDER_COLOR_UPLOAD_SIZE, FD5_BORDER_COLOR_UPLOAD_SIZE,
+ &off, &fd5_ctx->border_color_buf, &ptr);
- entries = ptr;
+ entries = ptr;
- setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
- setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
- &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
+ setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
+ setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
+ &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
- OUT_PKT4(ring, REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
- OUT_RELOC(ring, fd_resource(fd5_ctx->border_color_buf)->bo, off, 0, 0);
+ OUT_PKT4(ring, REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
+ OUT_RELOC(ring, fd_resource(fd5_ctx->border_color_buf)->bo, off, 0, 0);
- u_upload_unmap(fd5_ctx->border_color_uploader);
+ u_upload_unmap(fd5_ctx->border_color_uploader);
}
static bool
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a4xx_state_block sb, struct fd_texture_stateobj *tex)
- assert_dt
+ enum a4xx_state_block sb,
+ struct fd_texture_stateobj *tex) assert_dt
{
- bool needs_border = false;
- unsigned bcolor_offset = (sb == SB4_FS_TEX) ? ctx->tex[PIPE_SHADER_VERTEX].num_samplers : 0;
- unsigned i;
-
- if (tex->num_samplers > 0) {
- /* output sampler state: */
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * tex->num_samplers));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(tex->num_samplers));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- for (i = 0; i < tex->num_samplers; i++) {
- static const struct fd5_sampler_stateobj dummy_sampler = {};
- const struct fd5_sampler_stateobj *sampler = tex->samplers[i] ?
- fd5_sampler_stateobj(tex->samplers[i]) :
- &dummy_sampler;
- OUT_RING(ring, sampler->texsamp0);
- OUT_RING(ring, sampler->texsamp1);
- OUT_RING(ring, sampler->texsamp2 |
- A5XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset));
- OUT_RING(ring, sampler->texsamp3);
-
- needs_border |= sampler->needs_border;
- }
- }
-
- if (tex->num_textures > 0) {
- unsigned num_textures = tex->num_textures;
-
- /* emit texture state: */
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (12 * num_textures));
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- for (i = 0; i < tex->num_textures; i++) {
- static const struct fd5_pipe_sampler_view dummy_view = {};
- const struct fd5_pipe_sampler_view *view = tex->textures[i] ?
- fd5_pipe_sampler_view(tex->textures[i]) :
- &dummy_view;
- enum a5xx_tile_mode tile_mode = TILE5_LINEAR;
-
- if (view->base.texture)
- tile_mode = fd_resource(view->base.texture)->layout.tile_mode;
-
- OUT_RING(ring, view->texconst0 |
- A5XX_TEX_CONST_0_TILE_MODE(tile_mode));
- OUT_RING(ring, view->texconst1);
- OUT_RING(ring, view->texconst2);
- OUT_RING(ring, view->texconst3);
- if (view->base.texture) {
- struct fd_resource *rsc = fd_resource(view->base.texture);
- if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
- rsc = rsc->stencil;
- OUT_RELOC(ring, rsc->bo, view->offset,
- (uint64_t)view->texconst5 << 32, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, view->texconst5);
- }
- OUT_RING(ring, view->texconst6);
- OUT_RING(ring, view->texconst7);
- OUT_RING(ring, view->texconst8);
- OUT_RING(ring, view->texconst9);
- OUT_RING(ring, view->texconst10);
- OUT_RING(ring, view->texconst11);
- }
- }
-
- return needs_border;
+ bool needs_border = false;
+ unsigned bcolor_offset =
+ (sb == SB4_FS_TEX) ? ctx->tex[PIPE_SHADER_VERTEX].num_samplers : 0;
+ unsigned i;
+
+ if (tex->num_samplers > 0) {
+ /* output sampler state: */
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (i = 0; i < tex->num_samplers; i++) {
+ static const struct fd5_sampler_stateobj dummy_sampler = {};
+ const struct fd5_sampler_stateobj *sampler =
+ tex->samplers[i] ? fd5_sampler_stateobj(tex->samplers[i])
+ : &dummy_sampler;
+ OUT_RING(ring, sampler->texsamp0);
+ OUT_RING(ring, sampler->texsamp1);
+ OUT_RING(ring, sampler->texsamp2 |
+ A5XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset));
+ OUT_RING(ring, sampler->texsamp3);
+
+ needs_border |= sampler->needs_border;
+ }
+ }
+
+ if (tex->num_textures > 0) {
+ unsigned num_textures = tex->num_textures;
+
+ /* emit texture state: */
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (12 * num_textures));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ static const struct fd5_pipe_sampler_view dummy_view = {};
+ const struct fd5_pipe_sampler_view *view =
+ tex->textures[i] ? fd5_pipe_sampler_view(tex->textures[i])
+ : &dummy_view;
+ enum a5xx_tile_mode tile_mode = TILE5_LINEAR;
+
+ if (view->base.texture)
+ tile_mode = fd_resource(view->base.texture)->layout.tile_mode;
+
+ OUT_RING(ring,
+ view->texconst0 | A5XX_TEX_CONST_0_TILE_MODE(tile_mode));
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2);
+ OUT_RING(ring, view->texconst3);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
+ rsc = rsc->stencil;
+ OUT_RELOC(ring, rsc->bo, view->offset,
+ (uint64_t)view->texconst5 << 32, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, view->texconst5);
+ }
+ OUT_RING(ring, view->texconst6);
+ OUT_RING(ring, view->texconst7);
+ OUT_RING(ring, view->texconst8);
+ OUT_RING(ring, view->texconst9);
+ OUT_RING(ring, view->texconst10);
+ OUT_RING(ring, view->texconst11);
+ }
+ }
+
+ return needs_border;
}
static void
emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
- const struct ir3_shader_variant *v)
+ enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
+ const struct ir3_shader_variant *v)
{
- unsigned count = util_last_bit(so->enabled_mask);
-
- for (unsigned i = 0; i < count; i++) {
- OUT_PKT7(ring, CP_LOAD_STATE4, 5);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-
- struct pipe_shader_buffer *buf = &so->sb[i];
- unsigned sz = buf->buffer_size;
-
- /* width is in dwords, overflows into height: */
- sz /= 4;
-
- OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
- OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 5);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-
- if (buf->buffer) {
- struct fd_resource *rsc = fd_resource(buf->buffer);
- OUT_RELOC(ring, rsc->bo, buf->buffer_offset, 0, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
- }
+ unsigned count = util_last_bit(so->enabled_mask);
+
+ for (unsigned i = 0; i < count; i++) {
+ OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
+ struct pipe_shader_buffer *buf = &so->sb[i];
+ unsigned sz = buf->buffer_size;
+
+ /* width is in dwords, overflows into height: */
+ sz /= 4;
+
+ OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
+ OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
+ if (buf->buffer) {
+ struct fd_resource *rsc = fd_resource(buf->buffer);
+ OUT_RELOC(ring, rsc->bo, buf->buffer_offset, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
}
void
fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
{
- int32_t i, j;
- const struct fd_vertex_state *vtx = emit->vtx;
- const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
-
- for (i = 0, j = 0; i <= vp->inputs_count; i++) {
- if (vp->inputs[i].sysval)
- continue;
- if (vp->inputs[i].compmask) {
- struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
- const struct pipe_vertex_buffer *vb =
- &vtx->vertexbuf.vb[elem->vertex_buffer_index];
- struct fd_resource *rsc = fd_resource(vb->buffer.resource);
- enum pipe_format pfmt = elem->src_format;
- enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
- bool isint = util_format_is_pure_integer(pfmt);
- uint32_t off = vb->buffer_offset + elem->src_offset;
- uint32_t size = fd_bo_size(rsc->bo) - off;
- debug_assert(fmt != VFMT5_NONE);
+ int32_t i, j;
+ const struct fd_vertex_state *vtx = emit->vtx;
+ const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
+
+ for (i = 0, j = 0; i <= vp->inputs_count; i++) {
+ if (vp->inputs[i].sysval)
+ continue;
+ if (vp->inputs[i].compmask) {
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ const struct pipe_vertex_buffer *vb =
+ &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+ struct fd_resource *rsc = fd_resource(vb->buffer.resource);
+ enum pipe_format pfmt = elem->src_format;
+ enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
+ bool isint = util_format_is_pure_integer(pfmt);
+ uint32_t off = vb->buffer_offset + elem->src_offset;
+ uint32_t size = fd_bo_size(rsc->bo) - off;
+ debug_assert(fmt != VFMT5_NONE);
#ifdef DEBUG
- /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
- */
- if (off > fd_bo_size(rsc->bo))
- continue;
+ /* see
+ * dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
+ */
+ if (off > fd_bo_size(rsc->bo))
+ continue;
#endif
- OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
- OUT_RELOC(ring, rsc->bo, off, 0, 0);
- OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
- OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
-
- OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
- OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) |
- A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
- A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)) |
- A5XX_VFD_DECODE_INSTR_UNK30 |
- COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
- OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
-
- OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
- OUT_RING(ring, A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
-
- j++;
- }
- }
-
- OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1);
- OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j));
+ OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
+ OUT_RELOC(ring, rsc->bo, off, 0, 0);
+ OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
+ OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
+
+ OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
+ OUT_RING(
+ ring,
+ A5XX_VFD_DECODE_INSTR_IDX(j) | A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
+ A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)) |
+ A5XX_VFD_DECODE_INSTR_UNK30 |
+ COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
+ OUT_RING(
+ ring,
+ MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
+
+ OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
+ OUT_RING(ring,
+ A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
+
+ j++;
+ }
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1);
+ OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j));
}
void
fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit)
+ struct fd5_emit *emit)
{
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
- const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
- const enum fd_dirty_3d_state dirty = emit->dirty;
- bool needs_border = false;
-
- emit_marker5(ring, 5);
-
- if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
- unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
-
- for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
- mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
- A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
- A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
- A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
- A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
- A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
- A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
- A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
- struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
- uint32_t rb_alpha_control = zsa->rb_alpha_control;
-
- if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
- rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST;
-
- OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, rb_alpha_control);
-
- OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, zsa->rb_stencil_control);
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
- struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
- struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
-
- if (pfb->zsbuf) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
-
- if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
- gras_lrz_cntl = 0;
- else if (emit->binning_pass && blend->lrz_write && zsa->lrz_write)
- gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
-
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, gras_lrz_cntl);
- }
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
- struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
-
- OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 2);
- OUT_RING(ring, zsa->rb_stencilrefmask |
- A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
- OUT_RING(ring, zsa->rb_stencilrefmask_bf |
- A5XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
- }
-
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
- bool fragz = fp->no_earlyz || fp->has_kill || zsa->base.alpha_enabled || fp->writes_pos;
-
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
- OUT_RING(ring, zsa->rb_depth_cntl);
-
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1);
- OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
- COND(fragz && fp->fragcoord_compmask != 0,
- A5XX_RB_DEPTH_PLANE_CNTL_UNK1));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
- OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
- COND(fragz && fp->fragcoord_compmask != 0,
- A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1));
- }
-
- /* NOTE: scissor enabled bit is part of rasterizer state: */
- if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
- struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
- OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) |
- A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny));
- OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) |
- A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
- OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
- A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
- OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
- A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
-
- ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
- ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
- ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
- ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- fd_wfi(ctx->batch, ring);
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6);
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
- OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
- }
-
- if (dirty & FD_DIRTY_PROG)
- fd5_program_emit(ctx, ring, emit);
-
- if (dirty & FD_DIRTY_RASTERIZER) {
- struct fd5_rasterizer_stateobj *rasterizer =
- fd5_rasterizer_stateobj(ctx->rasterizer);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
- OUT_RING(ring, rasterizer->gras_su_cntl |
- COND(pfb->samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
- OUT_RING(ring, rasterizer->gras_su_point_minmax);
- OUT_RING(ring, rasterizer->gras_su_point_size);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
- OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
-
- OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
- OUT_RING(ring, rasterizer->pc_raster_cntl);
-
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
- }
-
- /* note: must come after program emit.. because there is some overlap
- * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
- * values from fd5_program_emit() to avoid having to re-emit the prog
- * every time rast state changes.
- *
- * Since the primitive restart state is not part of a tracked object, we
- * re-emit this register every time.
- */
- if (emit->info && ctx->rasterizer) {
- struct fd5_rasterizer_stateobj *rasterizer =
- fd5_rasterizer_stateobj(ctx->rasterizer);
- unsigned max_loc = fd5_context(ctx)->max_loc;
-
- OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, rasterizer->pc_primitive_cntl |
- A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) |
- COND(emit->info->primitive_restart && emit->info->index_size,
- A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART));
- }
-
- if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
- unsigned nr = pfb->nr_cbufs;
-
- if (emit->binning_pass)
- nr = 0;
- else if (ctx->rasterizer->rasterizer_discard)
- nr = 0;
-
- OUT_PKT4(ring, REG_A5XX_RB_FS_OUTPUT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
- COND(fp->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));
-
- OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 1);
- OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
- A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
- A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
- }
-
- ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
- if (!emit->binning_pass)
- ir3_emit_fs_consts(fp, ring, ctx);
-
- struct ir3_stream_output_info *info = &vp->shader->stream_output;
- if (info->num_outputs) {
- struct fd_streamout_stateobj *so = &ctx->streamout;
-
- for (unsigned i = 0; i < so->num_targets; i++) {
- struct fd_stream_output_target *target = fd_stream_output_target(so->targets[i]);
-
- if (!target)
- continue;
-
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(i), 3);
- /* VPC_SO[i].BUFFER_BASE_LO: */
- OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
- OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
-
- struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
-
- if (so->reset & (1 << i)) {
- assert(so->offsets[i] == 0);
-
- OUT_PKT7(ring, CP_MEM_WRITE, 3);
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
- OUT_RING(ring, target->base.buffer_offset);
-
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(i), 1);
- OUT_RING(ring, target->base.buffer_offset);
- } else {
- OUT_PKT7(ring, CP_MEM_TO_REG, 3);
- OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A5XX_VPC_SO_BUFFER_OFFSET(i)) |
- CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
- CP_MEM_TO_REG_0_CNT(0));
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
- }
-
- // After a draw HW would write the new offset to offset_bo
- OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(i), 2);
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
-
- so->reset &= ~(1 << i);
-
- emit->streamout_mask |= (1 << i);
- }
- }
-
- if (dirty & FD_DIRTY_BLEND) {
- struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
- uint32_t i;
-
- for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
- enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
- bool is_int = util_format_is_pure_integer(format);
- bool has_alpha = util_format_has_alpha(format);
- uint32_t control = blend->rb_mrt[i].control;
-
- if (is_int) {
- control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
- control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
- }
-
- if (!has_alpha) {
- control &= ~A5XX_RB_MRT_CONTROL_BLEND2;
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, control);
-
- OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control);
- }
-
- OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1);
- OUT_RING(ring, blend->sp_blend_cntl);
- }
-
- if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
- struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
-
- OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1);
- OUT_RING(ring, blend->rb_blend_cntl |
- A5XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask));
- }
-
- if (dirty & FD_DIRTY_BLEND_COLOR) {
- struct pipe_blend_color *bcolor = &ctx->blend_color;
-
- OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8);
- OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
- A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
- A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
- OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0]));
- OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
- A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
- A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
- OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1]));
- OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
- A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
- A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
- OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
- OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
- A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
- A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
- OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
- }
-
- if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
- needs_border |= emit_textures(ctx, ring, SB4_VS_TEX,
- &ctx->tex[PIPE_SHADER_VERTEX]);
- OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
- }
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
- needs_border |= emit_textures(ctx, ring, SB4_FS_TEX,
- &ctx->tex[PIPE_SHADER_FRAGMENT]);
- }
-
- OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
- ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
-
- OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
-
- if (needs_border)
- emit_border_color(ctx, ring);
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT], fp);
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
- fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
+ const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
+ const enum fd_dirty_3d_state dirty = emit->dirty;
+ bool needs_border = false;
+
+ emit_marker5(ring, 5);
+
+ if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
+ unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
+
+ for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+ uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+ if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+ rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST;
+
+ OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring, rb_alpha_control);
+
+ OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_stencil_control);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
+ struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
+
+ if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
+ gras_lrz_cntl = 0;
+ else if (emit->binning_pass && blend->lrz_write && zsa->lrz_write)
+ gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, gras_lrz_cntl);
+ }
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A5XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+ bool fragz = fp->no_earlyz || fp->has_kill || zsa->base.alpha_enabled ||
+ fp->writes_pos;
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
+ OUT_RING(ring, zsa->rb_depth_cntl);
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1);
+ OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
+ COND(fragz && fp->fragcoord_compmask != 0,
+ A5XX_RB_DEPTH_PLANE_CNTL_UNK1));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
+ OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
+ COND(fragz && fp->fragcoord_compmask != 0,
+ A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1));
+ }
+
+ /* NOTE: scissor enabled bit is part of rasterizer state: */
+ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) |
+ A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny));
+ OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) |
+ A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
+ A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
+ OUT_RING(ring,
+ A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
+ A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
+
+ ctx->batch->max_scissor.minx =
+ MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+ ctx->batch->max_scissor.miny =
+ MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+ ctx->batch->max_scissor.maxx =
+ MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+ ctx->batch->max_scissor.maxy =
+ MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx->batch, ring);
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+ OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+ }
+
+ if (dirty & FD_DIRTY_PROG)
+ fd5_program_emit(ctx, ring, emit);
+
+ if (dirty & FD_DIRTY_RASTERIZER) {
+ struct fd5_rasterizer_stateobj *rasterizer =
+ fd5_rasterizer_stateobj(ctx->rasterizer);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+ OUT_RING(ring, rasterizer->gras_su_cntl |
+ COND(pfb->samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, rasterizer->gras_su_point_minmax);
+ OUT_RING(ring, rasterizer->gras_su_point_size);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
+
+ OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
+ OUT_RING(ring, rasterizer->pc_raster_cntl);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+ }
+
+ /* note: must come after program emit.. because there is some overlap
+ * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
+ * values from fd5_program_emit() to avoid having to re-emit the prog
+ * every time rast state changes.
+ *
+ * Since the primitive restart state is not part of a tracked object, we
+ * re-emit this register every time.
+ */
+ if (emit->info && ctx->rasterizer) {
+ struct fd5_rasterizer_stateobj *rasterizer =
+ fd5_rasterizer_stateobj(ctx->rasterizer);
+ unsigned max_loc = fd5_context(ctx)->max_loc;
+
+ OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring,
+ rasterizer->pc_primitive_cntl |
+ A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) |
+ COND(emit->info->primitive_restart && emit->info->index_size,
+ A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART));
+ }
+
+ if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+ unsigned nr = pfb->nr_cbufs;
+
+ if (emit->binning_pass)
+ nr = 0;
+ else if (ctx->rasterizer->rasterizer_discard)
+ nr = 0;
+
+ OUT_PKT4(ring, REG_A5XX_RB_FS_OUTPUT_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
+ COND(fp->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));
+
+ OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 1);
+ OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
+ A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
+ A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
+ }
+
+ ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
+ if (!emit->binning_pass)
+ ir3_emit_fs_consts(fp, ring, ctx);
+
+ struct ir3_stream_output_info *info = &vp->shader->stream_output;
+ if (info->num_outputs) {
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+
+ for (unsigned i = 0; i < so->num_targets; i++) {
+ struct fd_stream_output_target *target =
+ fd_stream_output_target(so->targets[i]);
+
+ if (!target)
+ continue;
+
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(i), 3);
+ /* VPC_SO[i].BUFFER_BASE_LO: */
+ OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
+ OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
+
+ struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
+
+ if (so->reset & (1 << i)) {
+ assert(so->offsets[i] == 0);
+
+ OUT_PKT7(ring, CP_MEM_WRITE, 3);
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+ OUT_RING(ring, target->base.buffer_offset);
+
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(i), 1);
+ OUT_RING(ring, target->base.buffer_offset);
+ } else {
+ OUT_PKT7(ring, CP_MEM_TO_REG, 3);
+ OUT_RING(ring,
+ CP_MEM_TO_REG_0_REG(REG_A5XX_VPC_SO_BUFFER_OFFSET(i)) |
+ CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
+ CP_MEM_TO_REG_0_CNT(0));
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+ }
+
+ // After a draw HW would write the new offset to offset_bo
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(i), 2);
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+
+ so->reset &= ~(1 << i);
+
+ emit->streamout_mask |= (1 << i);
+ }
+ }
+
+ if (dirty & FD_DIRTY_BLEND) {
+ struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+ uint32_t i;
+
+ for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+
+ if (is_int) {
+ control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (!has_alpha) {
+ control &= ~A5XX_RB_MRT_CONTROL_BLEND2;
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, control);
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ }
+
+ OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1);
+ OUT_RING(ring, blend->sp_blend_cntl);
+ }
+
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
+ struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1);
+ OUT_RING(ring, blend->rb_blend_cntl |
+ A5XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask));
+ }
+
+ if (dirty & FD_DIRTY_BLEND_COLOR) {
+ struct pipe_blend_color *bcolor = &ctx->blend_color;
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8);
+ OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
+ A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
+ A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
+ OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0]));
+ OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
+ A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
+ A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
+ OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1]));
+ OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
+ A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
+ A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
+ OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
+ OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
+ A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
+ A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
+ OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
+ }
+
+ if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
+ needs_border |=
+ emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
+ OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
+ OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
+ }
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
+ needs_border |=
+ emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+ }
+
+ OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
+ OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask
+ ? ~0
+ : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+
+ OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
+
+ if (needs_border)
+ emit_border_color(ctx, ring);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
+ emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT],
+ fp);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
+ fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
}
void
fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct ir3_shader_variant *cp)
+ struct ir3_shader_variant *cp)
{
- enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
+ enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
- if (dirty & FD_DIRTY_SHADER_TEX) {
- bool needs_border = false;
- needs_border |= emit_textures(ctx, ring, SB4_CS_TEX,
- &ctx->tex[PIPE_SHADER_COMPUTE]);
+ if (dirty & FD_DIRTY_SHADER_TEX) {
+ bool needs_border = false;
+ needs_border |=
+ emit_textures(ctx, ring, SB4_CS_TEX, &ctx->tex[PIPE_SHADER_COMPUTE]);
- if (needs_border)
- emit_border_color(ctx, ring);
+ if (needs_border)
+ emit_border_color(ctx, ring);
- OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A5XX_TPL1_HS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A5XX_TPL1_HS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A5XX_TPL1_DS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A5XX_TPL1_DS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A5XX_TPL1_GS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A5XX_TPL1_GS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
- }
+ OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
+ }
- OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ?
- ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
+ OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
+ OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask
+ ? ~0
+ : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
- if (dirty & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE], cp);
+ if (dirty & FD_DIRTY_SHADER_SSBO)
+ emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE],
+ cp);
- if (dirty & FD_DIRTY_SHADER_IMAGE)
- fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
+ if (dirty & FD_DIRTY_SHADER_IMAGE)
+ fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
void
fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- fd5_set_render_mode(ctx, ring, BYPASS);
- fd5_cache_flush(batch, ring);
+ fd5_set_render_mode(ctx, ring, BYPASS);
+ fd5_cache_flush(batch, ring);
- OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
- OUT_RING(ring, 0xfffff);
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0xfffff);
-/*
-t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
-0000000500024048: 70d08003 00000000 001c5000 00000005
-t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
-0000000500024058: 70d08003 00000010 001c7000 00000005
+ /*
+ t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
+ 0000000500024048: 70d08003 00000000 001c5000 00000005
+ t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
+ 0000000500024058: 70d08003 00000010 001c7000 00000005
-t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
-0000000500024068: 70268000
-*/
+ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
+ 0000000500024068: 70268000
+ */
- OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, 0xffffffff);
+ OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff);
- OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
- OUT_RING(ring, 0x00000012);
+ OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
+ OUT_RING(ring, 0x00000012);
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
- OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) |
- A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0));
- OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5));
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) |
+ A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0));
+ OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5));
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SC_SCREEN_SCISSOR_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SC_SCREEN_SCISSOR_CNTL */
- OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1);
- OUT_RING(ring, 0); /* SP_VS_CONFIG_MAX_CONST */
+ OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1);
+ OUT_RING(ring, 0); /* SP_VS_CONFIG_MAX_CONST */
- OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1);
- OUT_RING(ring, 0); /* SP_FS_CONFIG_MAX_CONST */
+ OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1);
+ OUT_RING(ring, 0); /* SP_FS_CONFIG_MAX_CONST */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E292 */
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E293 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E292 */
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E293 */
- OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000044); /* RB_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000044); /* RB_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x00100000); /* RB_DBG_ECO_CNTL */
+ OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x00100000); /* RB_DBG_ECO_CNTL */
- OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1);
- OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
- OUT_RING(ring, 0x0000001e); /* SP_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0000001e); /* SP_MODE_CNTL */
- if (ctx->screen->gpu_id == 540) {
- OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x800); /* SP_DBG_ECO_CNTL */
+ if (ctx->screen->gpu_id == 540) {
+ OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x800); /* SP_DBG_ECO_CNTL */
- OUT_PKT4(ring, REG_A5XX_HLSQ_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT4(ring, REG_A5XX_HLSQ_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x0);
- OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x800400);
- } else {
- OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x40000800); /* SP_DBG_ECO_CNTL */
- }
+ OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x800400);
+ } else {
+ OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x40000800); /* SP_DBG_ECO_CNTL */
+ }
- OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000544); /* TPL1_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000544); /* TPL1_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2);
- OUT_RING(ring, 0x00000080); /* HLSQ_TIMEOUT_THRESHOLD_0 */
- OUT_RING(ring, 0x00000000); /* HLSQ_TIMEOUT_THRESHOLD_1 */
+ OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2);
+ OUT_RING(ring, 0x00000080); /* HLSQ_TIMEOUT_THRESHOLD_0 */
+ OUT_RING(ring, 0x00000000); /* HLSQ_TIMEOUT_THRESHOLD_1 */
- OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
- OUT_RING(ring, 0x00000400); /* VPC_DBG_ECO_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
+ OUT_RING(ring, 0x00000400); /* VPC_DBG_ECO_CNTL */
- OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000001); /* HLSQ_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000001); /* HLSQ_MODE_CNTL */
- OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* VPC_MODE_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* VPC_MODE_CNTL */
- /* we don't use this yet.. probably best to disable.. */
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+ /* we don't use this yet.. probably best to disable.. */
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */
- OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1);
- OUT_RING(ring, 0x000000ff); /* VPC_FS_PRIMITIVEID_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1);
+ OUT_RING(ring, 0x000000ff); /* VPC_FS_PRIMITIVEID_CNTL */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
- OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
+ OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(0), 3);
- OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
- OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
- OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(0), 3);
+ OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
+ OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
+ OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(0), 2);
- OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
- OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(0), 2);
+ OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
+ OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
- OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1);
- OUT_RING(ring, 0x00000000); /* PC_GS_PARAM */
+ OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1);
+ OUT_RING(ring, 0x00000000); /* PC_GS_PARAM */
- OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1);
- OUT_RING(ring, 0x00000000); /* PC_HS_PARAM */
+ OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1);
+ OUT_RING(ring, 0x00000000); /* PC_HS_PARAM */
- OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* TPL1_TP_FS_ROTATION_CNTL */
+ OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* TPL1_TP_FS_ROTATION_CNTL */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E004 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E004 */
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_LAYERED, 1);
- OUT_RING(ring, 0x00000000); /* GRAS_SU_LAYERED */
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_LAYERED, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_SU_LAYERED */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUF_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUF_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(0), 1);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(0), 1);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */
- OUT_PKT4(ring, REG_A5XX_PC_GS_LAYERED, 1);
- OUT_RING(ring, 0x00000000); /* PC_GS_LAYERED */
+ OUT_PKT4(ring, REG_A5XX_PC_GS_LAYERED, 1);
+ OUT_RING(ring, 0x00000000); /* PC_GS_LAYERED */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E5AB */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E5AB */
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_E5C2 */
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_E5C2 */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(1), 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(1), 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(1), 6);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(2), 6);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(1), 6);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(2), 6);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(3), 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(3), 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_SP_HS_CTRL_REG0, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_SP_HS_CTRL_REG0, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_SP_GS_CTRL_REG0, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_SP_GS_CTRL_REG0, 1);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
}
static void
fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
- unsigned dst_off, struct pipe_resource *src, unsigned src_off,
- unsigned sizedwords)
+ unsigned dst_off, struct pipe_resource *src, unsigned src_off,
+ unsigned sizedwords)
{
- struct fd_bo *src_bo = fd_resource(src)->bo;
- struct fd_bo *dst_bo = fd_resource(dst)->bo;
- unsigned i;
-
- for (i = 0; i < sizedwords; i++) {
- OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
- OUT_RELOC(ring, src_bo, src_off, 0, 0);
-
- dst_off += 4;
- src_off += 4;
- }
+ struct fd_bo *src_bo = fd_resource(src)->bo;
+ struct fd_bo *dst_bo = fd_resource(dst)->bo;
+ unsigned i;
+
+ for (i = 0; i < sizedwords; i++) {
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
+ OUT_RELOC(ring, src_bo, src_off, 0, 0);
+
+ dst_off += 4;
+ src_off += 4;
+ }
}
void
fd5_emit_init_screen(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->emit_ib = fd5_emit_ib;
- screen->mem_to_mem = fd5_mem_to_mem;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->emit_ib = fd5_emit_ib;
+ screen->mem_to_mem = fd5_mem_to_mem;
}
void
#include "pipe/p_context.h"
-#include "freedreno_batch.h"
-#include "freedreno_context.h"
#include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_program.h"
#include "fd5_screen.h"
+#include "freedreno_batch.h"
+#include "freedreno_context.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
/* grouped together emit-state for prog/vertex/state emit: */
struct fd5_emit {
- struct pipe_debug_callback *debug;
- const struct fd_vertex_state *vtx;
- const struct fd5_program_state *prog;
- const struct pipe_draw_info *info;
- const struct pipe_draw_indirect_info *indirect;
- const struct pipe_draw_start_count *draw;
- bool binning_pass;
- struct ir3_cache_key key;
- enum fd_dirty_3d_state dirty;
-
- uint32_t sprite_coord_enable; /* bitmask */
- bool sprite_coord_mode;
- bool rasterflat;
-
- /* in binning pass, we don't have real frag shader, so we
- * don't know if real draw disqualifies lrz write. So just
- * figure that out up-front and stash it in the emit.
- */
- bool no_lrz_write;
-
- /* cached to avoid repeated lookups of same variants: */
- const struct ir3_shader_variant *vs, *fs;
- /* TODO: other shader stages.. */
-
- unsigned streamout_mask;
+ struct pipe_debug_callback *debug;
+ const struct fd_vertex_state *vtx;
+ const struct fd5_program_state *prog;
+ const struct pipe_draw_info *info;
+ const struct pipe_draw_indirect_info *indirect;
+ const struct pipe_draw_start_count *draw;
+ bool binning_pass;
+ struct ir3_cache_key key;
+ enum fd_dirty_3d_state dirty;
+
+ uint32_t sprite_coord_enable; /* bitmask */
+ bool sprite_coord_mode;
+ bool rasterflat;
+
+ /* in binning pass, we don't have real frag shader, so we
+ * don't know if real draw disqualifies lrz write. So just
+ * figure that out up-front and stash it in the emit.
+ */
+ bool no_lrz_write;
+
+ /* cached to avoid repeated lookups of same variants: */
+ const struct ir3_shader_variant *vs, *fs;
+ /* TODO: other shader stages.. */
+
+ unsigned streamout_mask;
};
-static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf)
+static inline enum a5xx_color_fmt
+fd5_emit_format(struct pipe_surface *surf)
{
- if (!surf)
- return 0;
- return fd5_pipe2color(surf->format);
+ if (!surf)
+ return 0;
+ return fd5_pipe2color(surf->format);
}
static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit *emit)
{
- if (!emit->vs) {
- /* We use nonbinning VS during binning when TFB is enabled because that
- * is what has all the outputs that might be involved in TFB.
- */
- if (emit->binning_pass && !emit->prog->vs->shader->stream_output.num_outputs)
- emit->vs = emit->prog->bs;
- else
- emit->vs = emit->prog->vs;
- }
- return emit->vs;
+ if (!emit->vs) {
+ /* We use nonbinning VS during binning when TFB is enabled because that
+ * is what has all the outputs that might be involved in TFB.
+ */
+ if (emit->binning_pass &&
+ !emit->prog->vs->shader->stream_output.num_outputs)
+ emit->vs = emit->prog->bs;
+ else
+ emit->vs = emit->prog->vs;
+ }
+ return emit->vs;
}
static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit *emit)
{
- if (!emit->fs) {
- if (emit->binning_pass) {
- /* use dummy stateobj to simplify binning vs non-binning: */
- static const struct ir3_shader_variant binning_fs = {};
- emit->fs = &binning_fs;
- } else {
- emit->fs = emit->prog->fs;
- }
- }
- return emit->fs;
+ if (!emit->fs) {
+ if (emit->binning_pass) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct ir3_shader_variant binning_fs = {};
+ emit->fs = &binning_fs;
+ } else {
+ emit->fs = emit->prog->fs;
+ }
+ }
+ return emit->fs;
}
static inline void
-fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
- assert_dt
+fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
{
- fd_reset_wfi(batch);
- OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
- OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
- OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
- OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
- OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
- OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
- fd_wfi(batch, ring);
+ fd_reset_wfi(batch);
+ OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
+ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
+ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
+ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
+ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
+ OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
+ fd_wfi(batch, ring);
}
static inline void
fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum render_mode_cmd mode)
+ enum render_mode_cmd mode)
{
- /* TODO add preemption support, gmem bypass, etc */
- emit_marker5(ring, 7);
- OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
- OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
- OUT_RING(ring, 0x00000000); /* ADDR_LO */
- OUT_RING(ring, 0x00000000); /* ADDR_HI */
- OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
- COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
- OUT_RING(ring, 0x00000000);
- emit_marker5(ring, 7);
+ /* TODO add preemption support, gmem bypass, etc */
+ emit_marker5(ring, 7);
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
+ OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
+ OUT_RING(ring, 0x00000000); /* ADDR_LO */
+ OUT_RING(ring, 0x00000000); /* ADDR_HI */
+ OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
+ COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
+ OUT_RING(ring, 0x00000000);
+ emit_marker5(ring, 7);
}
static inline void
fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum vgt_event_type evt, bool timestamp)
+ enum vgt_event_type evt, bool timestamp)
{
- OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
- if (timestamp) {
- OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
- OUT_RING(ring, 0x00000000);
- }
+ OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
+ if (timestamp) {
+ OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
+ 0); /* ADDR_LO/HI */
+ OUT_RING(ring, 0x00000000);
+ }
}
static inline void
fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- emit_marker5(ring, 7);
- fd5_event_write(batch, ring, BLIT, true);
- emit_marker5(ring, 7);
+ emit_marker5(ring, 7);
+ fd5_event_write(batch, ring, BLIT, true);
+ emit_marker5(ring, 7);
}
static inline void
-fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning)
- assert_dt
+fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
{
- struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw;
-
- /* TODO eventually this partially depends on the pfb state, ie.
- * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
- * we could probably cache and just regenerate if framebuffer
- * state is dirty (or something like that)..
- *
- * Other bits seem to depend on query state, like if samples-passed
- * query is active.
- */
- bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
- OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
- OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
- COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
- COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
- COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
- COND(!blit, 0x8));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
- OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
- COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
- COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
+ struct fd_ringbuffer *ring =
+ binning ? ctx->batch->binning : ctx->batch->draw;
+
+ /* TODO eventually this partially depends on the pfb state, ie.
+ * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
+ * we could probably cache and just regenerate if framebuffer
+ * state is dirty (or something like that)..
+ *
+ * Other bits seem to depend on query state, like if samples-passed
+ * query is active.
+ */
+ bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+ OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
+ COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
+ COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
+ COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
+ COND(!blit, 0x8));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
+ OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
+ COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
+ COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
}
static inline void
fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
- * a workaround and not needed on all a5xx.
- */
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
+ /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
+ * a workaround and not needed on all a5xx.
+ */
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
- fd5_event_write(batch, ring, LRZ_FLUSH, false);
+ fd5_event_write(batch, ring, LRZ_FLUSH, false);
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x0);
}
-void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) assert_dt;
+void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ struct fd5_emit *emit) assert_dt;
void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit) assert_dt;
+ struct fd5_emit *emit) assert_dt;
void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct ir3_shader_variant *cp) assert_dt;
-void fd5_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_grid_info *info) assert_dt;
+ struct ir3_shader_variant *cp) assert_dt;
+void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_grid_info *info) assert_dt;
-void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
+void fd5_emit_restore(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt;
void fd5_emit_init_screen(struct pipe_screen *pscreen);
void fd5_emit_init(struct pipe_context *pctx);
static inline void
fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- /* for debug after a lock up, write a unique counter value
- * to scratch6 for each IB, to make it easier to match up
- * register dumps to cmdstream. The combination of IB and
- * DRAW (scratch7) is enough to "triangulate" the particular
- * draw that caused lockup.
- */
- emit_marker5(ring, 6);
- __OUT_IB5(ring, target);
- emit_marker5(ring, 6);
+ /* for debug after a lock up, write a unique counter value
+ * to scratch6 for each IB, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB and
+ * DRAW (scratch7) is enough to "triangulate" the particular
+ * draw that caused lockup.
+ */
+ emit_marker5(ring, 6);
+ __OUT_IB5(ring, target);
+ emit_marker5(ring, 6);
}
#endif /* FD5_EMIT_H */
#include "fd5_format.h"
-
/* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables.
*/
struct fd5_format {
- enum a5xx_vtx_fmt vtx;
- enum a5xx_tex_fmt tex;
- enum a5xx_color_fmt rb;
- enum a3xx_color_swap swap;
- boolean present;
+ enum a5xx_vtx_fmt vtx;
+ enum a5xx_tex_fmt tex;
+ enum a5xx_color_fmt rb;
+ enum a3xx_color_swap swap;
+ boolean present;
};
/* vertex + texture */
-#define VT(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT5_ ## fmt, \
- .tex = TFMT5_ ## fmt, \
- .rb = RB5_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT5_##fmt, \
+ .tex = TFMT5_##fmt, \
+ .rb = RB5_##rbfmt, \
+ .swap = swapfmt}
/* texture-only */
-#define _T(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT5_NONE, \
- .tex = TFMT5_ ## fmt, \
- .rb = RB5_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT5_NONE, \
+ .tex = TFMT5_##fmt, \
+ .rb = RB5_##rbfmt, \
+ .swap = swapfmt}
/* vertex-only */
-#define V_(pipe, fmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = VFMT5_ ## fmt, \
- .tex = TFMT5_NONE, \
- .rb = RB5_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = VFMT5_##fmt, \
+ .tex = TFMT5_NONE, \
+ .rb = RB5_##rbfmt, \
+ .swap = swapfmt}
/* clang-format off */
static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
enum a5xx_vtx_fmt
fd5_pipe2vtx(enum pipe_format format)
{
- if (!formats[format].present)
- return VFMT5_NONE;
- return formats[format].vtx;
+ if (!formats[format].present)
+ return VFMT5_NONE;
+ return formats[format].vtx;
}
/* convert pipe format to texture sampler format: */
enum a5xx_tex_fmt
fd5_pipe2tex(enum pipe_format format)
{
- if (!formats[format].present)
- return TFMT5_NONE;
- return formats[format].tex;
+ if (!formats[format].present)
+ return TFMT5_NONE;
+ return formats[format].tex;
}
/* convert pipe format to MRT / copydest format used for render-target: */
enum a5xx_color_fmt
fd5_pipe2color(enum pipe_format format)
{
- if (!formats[format].present)
- return RB5_NONE;
- return formats[format].rb;
+ if (!formats[format].present)
+ return RB5_NONE;
+ return formats[format].rb;
}
enum a3xx_color_swap
fd5_pipe2swap(enum pipe_format format)
{
- if (!formats[format].present)
- return WZYX;
- return formats[format].swap;
+ if (!formats[format].present)
+ return WZYX;
+ return formats[format].swap;
}
enum a5xx_depth_format
fd5_pipe2depth(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTH5_16;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- return DEPTH5_24_8;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return DEPTH5_32;
- default:
- return ~0;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH5_16;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ return DEPTH5_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTH5_32;
+ default:
+ return ~0;
+ }
}
static inline enum a5xx_tex_swiz
tex_swiz(unsigned swiz)
{
- switch (swiz) {
- default:
- case PIPE_SWIZZLE_X: return A5XX_TEX_X;
- case PIPE_SWIZZLE_Y: return A5XX_TEX_Y;
- case PIPE_SWIZZLE_Z: return A5XX_TEX_Z;
- case PIPE_SWIZZLE_W: return A5XX_TEX_W;
- case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO;
- case PIPE_SWIZZLE_1: return A5XX_TEX_ONE;
- }
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_X:
+ return A5XX_TEX_X;
+ case PIPE_SWIZZLE_Y:
+ return A5XX_TEX_Y;
+ case PIPE_SWIZZLE_Z:
+ return A5XX_TEX_Z;
+ case PIPE_SWIZZLE_W:
+ return A5XX_TEX_W;
+ case PIPE_SWIZZLE_0:
+ return A5XX_TEX_ZERO;
+ case PIPE_SWIZZLE_1:
+ return A5XX_TEX_ONE;
+ }
}
uint32_t
fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+ unsigned swizzle_b, unsigned swizzle_a)
{
- const struct util_format_description *desc =
- util_format_description(format);
- unsigned char swiz[4] = {
- swizzle_r, swizzle_g, swizzle_b, swizzle_a,
- }, rswiz[4];
-
- util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
-
- return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
- A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
- A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
- A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned char swiz[4] =
+ {
+ swizzle_r,
+ swizzle_g,
+ swizzle_b,
+ swizzle_a,
+ },
+ rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+ A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}
enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format);
uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r,
- unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+ unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
#endif /* FD5_UTIL_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_draw.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd5_gmem.h"
#include "fd5_context.h"
#include "fd5_draw.h"
#include "fd5_emit.h"
-#include "fd5_program.h"
#include "fd5_format.h"
+#include "fd5_gmem.h"
+#include "fd5_program.h"
#include "fd5_zsa.h"
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
- struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
+ struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
{
- enum a5xx_tile_mode tile_mode;
- unsigned i;
-
- for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
- enum a5xx_color_fmt format = 0;
- enum a3xx_color_swap swap = WZYX;
- bool srgb = false, sint = false, uint = false;
- struct fd_resource *rsc = NULL;
- struct fdl_slice *slice = NULL;
- uint32_t stride = 0;
- uint32_t size = 0;
- uint32_t base = 0;
- uint32_t offset = 0;
-
- if (gmem) {
- tile_mode = TILE5_2;
- } else {
- tile_mode = TILE5_LINEAR;
- }
-
- if ((i < nr_bufs) && bufs[i]) {
- struct pipe_surface *psurf = bufs[i];
- enum pipe_format pformat = psurf->format;
-
- rsc = fd_resource(psurf->texture);
-
- slice = fd_resource_slice(rsc, psurf->u.tex.level);
- format = fd5_pipe2color(pformat);
- swap = fd5_pipe2swap(pformat);
- srgb = util_format_is_srgb(pformat);
- sint = util_format_is_pure_sint(pformat);
- uint = util_format_is_pure_uint(pformat);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
-
- if (gmem) {
- stride = gmem->bin_w * gmem->cbuf_cpp[i];
- size = stride * gmem->bin_h;
- base = gmem->cbuf_base[i];
- } else {
- stride = fd_resource_pitch(rsc, psurf->u.tex.level);
- size = slice->size0;
-
- tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
- }
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
- OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
- A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
- A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
- COND(gmem, 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
- COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
- OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
- OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
- if (gmem || (i >= nr_bufs) || !bufs[i]) {
- OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
- } else {
- debug_assert((offset + size) <= fd_bo_size(rsc->bo));
- OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
- }
-
- OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
- OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
- COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
- COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
- COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
-
- /* when we support UBWC, these would be the system memory
- * addr/pitch/etc:
- */
- OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
- OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
- OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
- OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
- OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
- }
+ enum a5xx_tile_mode tile_mode;
+ unsigned i;
+
+ for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+ enum a5xx_color_fmt format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ bool srgb = false, sint = false, uint = false;
+ struct fd_resource *rsc = NULL;
+ struct fdl_slice *slice = NULL;
+ uint32_t stride = 0;
+ uint32_t size = 0;
+ uint32_t base = 0;
+ uint32_t offset = 0;
+
+ if (gmem) {
+ tile_mode = TILE5_2;
+ } else {
+ tile_mode = TILE5_LINEAR;
+ }
+
+ if ((i < nr_bufs) && bufs[i]) {
+ struct pipe_surface *psurf = bufs[i];
+ enum pipe_format pformat = psurf->format;
+
+ rsc = fd_resource(psurf->texture);
+
+ slice = fd_resource_slice(rsc, psurf->u.tex.level);
+ format = fd5_pipe2color(pformat);
+ swap = fd5_pipe2swap(pformat);
+ srgb = util_format_is_srgb(pformat);
+ sint = util_format_is_pure_sint(pformat);
+ uint = util_format_is_pure_uint(pformat);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ offset = fd_resource_offset(rsc, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+
+ if (gmem) {
+ stride = gmem->bin_w * gmem->cbuf_cpp[i];
+ size = stride * gmem->bin_h;
+ base = gmem->cbuf_base[i];
+ } else {
+ stride = fd_resource_pitch(rsc, psurf->u.tex.level);
+ size = slice->size0;
+
+ tile_mode =
+ fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
+ }
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
+ OUT_RING(
+ ring,
+ A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+ COND(gmem,
+ 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
+ COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+ OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
+ OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
+ if (gmem || (i >= nr_bufs) || !bufs[i]) {
+ OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
+ } else {
+ debug_assert((offset + size) <= fd_bo_size(rsc->bo));
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
+ }
+
+ OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
+ OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
+ COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
+ COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
+ COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
+
+ /* when we support UBWC, these would be the system memory
+ * addr/pitch/etc:
+ */
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
+ OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
+ OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
+ OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
+ OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
+ }
}
static void
emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
- const struct fd_gmem_stateobj *gmem)
+ const struct fd_gmem_stateobj *gmem)
{
- if (zsbuf) {
- struct fd_resource *rsc = fd_resource(zsbuf->texture);
- enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
- uint32_t cpp = rsc->layout.cpp;
- uint32_t stride = 0;
- uint32_t size = 0;
-
- if (gmem) {
- stride = cpp * gmem->bin_w;
- size = stride * gmem->bin_h;
- } else {
- stride = fd_resource_pitch(rsc, 0);
- size = fd_resource_slice(rsc, 0)->size0;
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
- OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
- if (gmem) {
- OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
- } else {
- OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
- }
- OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
- OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
-
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
-
- if (rsc->lrz) {
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
- OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
- OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
-
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
- OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
- } else {
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
-
- OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
-
- if (rsc->stencil) {
- if (gmem) {
- stride = 1 * gmem->bin_w;
- size = stride * gmem->bin_h;
- } else {
- stride = fd_resource_pitch(rsc->stencil, 0);
- size = fd_resource_slice(rsc->stencil, 0)->size0;
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
- OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
- if (gmem) {
- OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
- } else {
- OUT_RELOC(ring, rsc->stencil->bo, 0, 0, 0); /* RB_STENCIL_BASE_LO/HI */
- }
- OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
- OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
- } else {
- OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
- OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
- }
- } else {
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
- OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
- OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
-
- OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
-
- OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
- OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
- }
+ if (zsbuf) {
+ struct fd_resource *rsc = fd_resource(zsbuf->texture);
+ enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
+ uint32_t cpp = rsc->layout.cpp;
+ uint32_t stride = 0;
+ uint32_t size = 0;
+
+ if (gmem) {
+ stride = cpp * gmem->bin_w;
+ size = stride * gmem->bin_h;
+ } else {
+ stride = fd_resource_pitch(rsc, 0);
+ size = fd_resource_slice(rsc, 0)->size0;
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
+ OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
+ if (gmem) {
+ OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
+ } else {
+ OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
+ }
+ OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
+ OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
+
+ if (rsc->lrz) {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
+ OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
+ } else {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ if (rsc->stencil) {
+ if (gmem) {
+ stride = 1 * gmem->bin_w;
+ size = stride * gmem->bin_h;
+ } else {
+ stride = fd_resource_pitch(rsc->stencil, 0);
+ size = fd_resource_slice(rsc->stencil, 0)->size0;
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
+ OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
+ if (gmem) {
+ OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
+ } else {
+ OUT_RELOC(ring, rsc->stencil->bo, 0, 0,
+ 0); /* RB_STENCIL_BASE_LO/HI */
+ }
+ OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
+ OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
+ } else {
+ OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
+ }
+ } else {
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
+ OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
+ OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
+ }
}
static bool
use_hw_binning(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- if ((gmem->maxpw * gmem->maxph) > 32)
- return false;
+ if ((gmem->maxpw * gmem->maxph) > 32)
+ return false;
- if ((gmem->maxpw > 15) || (gmem->maxph > 15))
- return false;
+ if ((gmem->maxpw > 15) || (gmem->maxph > 15))
+ return false;
- return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
- (batch->num_draws > 0);
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
+ (batch->num_draws > 0);
}
static void
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
{
- unsigned i;
- for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
- *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
- }
- util_dynarray_clear(&batch->draw_patches);
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+ *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+ }
+ util_dynarray_clear(&batch->draw_patches);
}
static void
-update_vsc_pipe(struct fd_batch *batch)
- assert_dt
+update_vsc_pipe(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd5_context *fd5_ctx = fd5_context(ctx);
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
- OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
- A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
- OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
-
- OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
- OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
-
- OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
- for (i = 0; i < 16; i++) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
- A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
- A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
- A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
- }
-
- OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
- for (i = 0; i < 16; i++) {
- if (!ctx->vsc_pipe_bo[i]) {
- ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, 0x20000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
- }
- OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
- }
-
- OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
- for (i = 0; i < 16; i++) {
- OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
- }
+ struct fd_context *ctx = batch->ctx;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
+ OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+ OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
+ for (i = 0; i < 16; i++) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+ A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+ A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+ A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
+ for (i = 0; i < 16; i++) {
+ if (!ctx->vsc_pipe_bo[i]) {
+ ctx->vsc_pipe_bo[i] = fd_bo_new(
+ ctx->dev, 0x20000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
+ }
+ OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
+ 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
+ for (i = 0; i < 16; i++) {
+ OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
+ 32); /* VSC_PIPE_DATA_LENGTH[i] */
+ }
}
static void
-emit_binning_pass(struct fd_batch *batch)
- assert_dt
+emit_binning_pass(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- uint32_t x1 = gmem->minx;
- uint32_t y1 = gmem->miny;
- uint32_t x2 = gmem->minx + gmem->width - 1;
- uint32_t y2 = gmem->miny + gmem->height - 1;
+ uint32_t x1 = gmem->minx;
+ uint32_t y1 = gmem->miny;
+ uint32_t x2 = gmem->minx + gmem->width - 1;
+ uint32_t y2 = gmem->miny + gmem->height - 1;
- fd5_set_render_mode(batch->ctx, ring, BINNING);
+ fd5_set_render_mode(batch->ctx, ring, BINNING);
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
- A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
- A5XX_RB_RESOLVE_CNTL_1_Y(y1));
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
- A5XX_RB_RESOLVE_CNTL_2_Y(y2));
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
- update_vsc_pipe(batch);
+ update_vsc_pipe(batch);
- OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
- OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
+ OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+ OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
- fd5_event_write(batch, ring, UNK_2C, false);
+ fd5_event_write(batch, ring, UNK_2C, false);
- OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
- A5XX_RB_WINDOW_OFFSET_Y(0));
+ OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
- /* emit IB to binning drawcmds: */
- fd5_emit_ib(ring, batch->binning);
+ /* emit IB to binning drawcmds: */
+ fd5_emit_ib(ring, batch->binning);
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
- fd5_event_write(batch, ring, UNK_2D, false);
+ fd5_event_write(batch, ring, UNK_2D, false);
- fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
+ fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
- // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
+ // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0);
}
/* before first tile */
static void
-fd5_emit_tile_init(struct fd_batch *batch)
- assert_dt
+fd5_emit_tile_init(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- fd5_emit_restore(batch, ring);
+ fd5_emit_restore(batch, ring);
- if (batch->prologue)
- fd5_emit_ib(ring, batch->prologue);
+ if (batch->prologue)
+ fd5_emit_ib(ring, batch->prologue);
- fd5_emit_lrz_flush(batch, ring);
+ fd5_emit_lrz_flush(batch, ring);
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+ OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
+ OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
- OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
+ OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
- /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
- fd_wfi(batch, ring);
- OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
- OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
+ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
+ fd_wfi(batch, ring);
+ OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
- emit_zs(ring, pfb->zsbuf, batch->gmem_state);
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
+ emit_zs(ring, pfb->zsbuf, batch->gmem_state);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
- /* Enable stream output for the first pass (likely the binning). */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
- OUT_RING(ring, 0);
+ /* Enable stream output for the first pass (likely the binning). */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
+ OUT_RING(ring, 0);
- if (use_hw_binning(batch)) {
- emit_binning_pass(batch);
+ if (use_hw_binning(batch)) {
+ emit_binning_pass(batch);
- /* Disable stream output after binning, since each VS output should get
- * streamed out once.
- */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
- OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
+ /* Disable stream output after binning, since each VS output should get
+ * streamed out once.
+ */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
+ OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
- fd5_emit_lrz_flush(batch, ring);
- patch_draws(batch, USE_VISIBILITY);
- } else {
- patch_draws(batch, IGNORE_VISIBILITY);
- }
+ fd5_emit_lrz_flush(batch, ring);
+ patch_draws(batch, USE_VISIBILITY);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
- fd5_set_render_mode(batch->ctx, ring, GMEM);
+ fd5_set_render_mode(batch->ctx, ring, GMEM);
- /* XXX If we're in gmem mode but not doing HW binning, then after the first
- * tile we should disable stream output (fd6_gmem.c doesn't do that either).
- */
+ /* XXX If we're in gmem mode but not doing HW binning, then after the first
+ * tile we should disable stream output (fd6_gmem.c doesn't do that either).
+ */
}
/* before mem2gmem */
static void
-fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
- assert_dt
+fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd5_context *fd5_ctx = fd5_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
-
- uint32_t x1 = tile->xoff;
- uint32_t y1 = tile->yoff;
- uint32_t x2 = tile->xoff + tile->bin_w - 1;
- uint32_t y2 = tile->yoff + tile->bin_h - 1;
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
-
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
- A5XX_RB_RESOLVE_CNTL_1_Y(y1));
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
- A5XX_RB_RESOLVE_CNTL_2_Y(y2));
-
- if (use_hw_binning(batch)) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
- struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
-
- OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
-
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x0);
-
- OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
- OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
- CP_SET_BIN_DATA5_0_VSC_N(tile->n));
- OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
- OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
- (tile->p * 4), 0, 0);
- } else {
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
- }
-
- OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) |
- A5XX_RB_WINDOW_OFFSET_Y(y1));
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
+
+ uint32_t x1 = tile->xoff;
+ uint32_t y1 = tile->yoff;
+ uint32_t x2 = tile->xoff + tile->bin_w - 1;
+ uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
+
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
+
+ if (use_hw_binning(batch)) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
+ struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
+
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
+ OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
+ CP_SET_BIN_DATA5_0_VSC_N(tile->n));
+ OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
+ }
+
+ OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
}
-
/*
* transfer from system memory to gmem
*/
static void
emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
- struct pipe_surface *psurf, enum a5xx_blit_buf buf)
+ struct pipe_surface *psurf, enum a5xx_blit_buf buf)
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- uint32_t stride, size;
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- if (buf == BLIT_S)
- rsc = rsc->stencil;
-
- if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
- // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
- // know otherwise how to go from linear in sysmem to tiled in gmem.
- // possibly we want to flip this around gmem2mem and keep depth
- // tiled in sysmem (and fixup sampler state to assume tiled).. this
- // might be required for doing depth/stencil in bypass mode?
- struct fdl_slice *slice = fd_resource_slice(rsc, 0);
- enum a5xx_color_fmt format =
- fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
-
- OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
- OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
- A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
- A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
- OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
- OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
- OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */
-
- buf = BLIT_MRT0;
- }
-
- stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
- size = stride * gmem->bin_h;
-
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
-
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
- OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
- OUT_RING(ring, base); /* RB_BLIT_DST_LO */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
- OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
- OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
-
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
-
- fd5_emit_blit(batch, ring);
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ uint32_t stride, size;
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ if (buf == BLIT_S)
+ rsc = rsc->stencil;
+
+ if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
+ // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
+ // know otherwise how to go from linear in sysmem to tiled in gmem.
+ // possibly we want to flip this around gmem2mem and keep depth
+ // tiled in sysmem (and fixup sampler state to assume tiled).. this
+ // might be required for doing depth/stencil in bypass mode?
+ struct fdl_slice *slice = fd_resource_slice(rsc, 0);
+ enum a5xx_color_fmt format =
+ fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
+ OUT_RING(ring,
+ A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
+ OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
+ OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
+ OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */
+
+ buf = BLIT_MRT0;
+ }
+
+ stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
+ size = stride * gmem->bin_h;
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
+ OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
+ OUT_RING(ring, base); /* RB_BLIT_DST_LO */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
+ OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
+ OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
+
+ fd5_emit_blit(batch, ring);
}
static void
fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- /*
- * setup mrt and zs with system memory base addresses:
- */
-
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
-// emit_zs(ring, pfb->zsbuf, NULL);
-
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
- A5XX_RB_CNTL_HEIGHT(gmem->bin_h) |
- A5XX_RB_CNTL_BYPASS);
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
- unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_mem2gmem_surf(batch, gmem->cbuf_base[i],
- pfb->cbufs[i], BLIT_MRT0 + i);
- }
- }
-
- if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-
- if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
- emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
- if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
- emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
- }
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ /*
+ * setup mrt and zs with system memory base addresses:
+ */
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
+ // emit_zs(ring, pfb->zsbuf, NULL);
+
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
+ A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
+
+ if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
+ BLIT_MRT0 + i);
+ }
+ }
+
+ if (fd_gmem_needs_restore(batch, tile,
+ FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+
+ if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
+ emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
+ if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
+ emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
+ }
}
-
/* before IB to rendering cmds: */
static void
fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
- A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
-
- emit_zs(ring, pfb->zsbuf, gmem);
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
-
- enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
-
- OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
-
- OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
-
-
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring,
+ A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
+
+ emit_zs(ring, pfb->zsbuf, gmem);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
+
+ enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
+
+ OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE,
+ A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+ OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring,
+ A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE,
+ A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
}
-
/*
* transfer from gmem to system memory (ie. normal RAM)
*/
static void
emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
- struct pipe_surface *psurf, enum a5xx_blit_buf buf)
+ struct pipe_surface *psurf, enum a5xx_blit_buf buf)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_resource *rsc = fd_resource(psurf->texture);
- struct fdl_slice *slice;
- bool tiled;
- uint32_t offset, pitch;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ struct fdl_slice *slice;
+ bool tiled;
+ uint32_t offset, pitch;
- if (!rsc->valid)
- return;
+ if (!rsc->valid)
+ return;
- if (buf == BLIT_S)
- rsc = rsc->stencil;
+ if (buf == BLIT_S)
+ rsc = rsc->stencil;
- slice = fd_resource_slice(rsc, psurf->u.tex.level);
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
- pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
+ slice = fd_resource_slice(rsc, psurf->u.tex.level);
+ offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
- OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
+ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
- tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
+ tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
- OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
- COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
- OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
- OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
- OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
+ OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
+ COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
+ OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
+ OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
- OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
- OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
-// bool msaa_resolve = pfb->samples > 1;
- bool msaa_resolve = false;
- OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
- OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
+ // bool msaa_resolve = pfb->samples > 1;
+ bool msaa_resolve = false;
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
- fd5_emit_blit(batch, ring);
+ fd5_emit_blit(batch, ring);
}
static void
fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-
- if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
- emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
- if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
- emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
- }
-
- if (batch->resolve & FD_BUFFER_COLOR) {
- unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_gmem2mem_surf(batch, gmem->cbuf_base[i],
- pfb->cbufs[i], BLIT_MRT0 + i);
- }
- }
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+
+ if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
+ emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
+ if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
+ }
+
+ if (batch->resolve & FD_BUFFER_COLOR) {
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
+ BLIT_MRT0 + i);
+ }
+ }
}
static void
-fd5_emit_tile_fini(struct fd_batch *batch)
- assert_dt
+fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- fd5_emit_lrz_flush(batch, ring);
+ fd5_emit_lrz_flush(batch, ring);
- fd5_cache_flush(batch, ring);
- fd5_set_render_mode(batch->ctx, ring, BYPASS);
+ fd5_cache_flush(batch, ring);
+ fd5_set_render_mode(batch->ctx, ring, BYPASS);
}
static void
-fd5_emit_sysmem_prep(struct fd_batch *batch)
- assert_dt
+fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- fd5_emit_restore(batch, ring);
+ fd5_emit_restore(batch, ring);
- fd5_emit_lrz_flush(batch, ring);
+ fd5_emit_lrz_flush(batch, ring);
- if (batch->prologue)
- fd5_emit_ib(ring, batch->prologue);
+ if (batch->prologue)
+ fd5_emit_ib(ring, batch->prologue);
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
+ fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
- OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+ OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
+ OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
- OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
+ OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
- /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
- fd_wfi(batch, ring);
- OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
- OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
+ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
+ fd_wfi(batch, ring);
+ OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
- OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
- OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) |
- A5XX_RB_CNTL_HEIGHT(0) |
- A5XX_RB_CNTL_BYPASS);
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
+ A5XX_RB_CNTL_BYPASS);
- /* remaining setup below here does not apply to blit/compute: */
- if (batch->nondraw)
- return;
+ /* remaining setup below here does not apply to blit/compute: */
+ if (batch->nondraw)
+ return;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
- OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
- A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
- OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
- A5XX_RB_RESOLVE_CNTL_1_Y(0));
- OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
- A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
+ A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
- OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
- A5XX_RB_WINDOW_OFFSET_Y(0));
+ OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
- /* Enable stream output, since there's no binning pass to put it in. */
- OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
- OUT_RING(ring, 0);
+ /* Enable stream output, since there's no binning pass to put it in. */
+ OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
+ OUT_RING(ring, 0);
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
- patch_draws(batch, IGNORE_VISIBILITY);
+ patch_draws(batch, IGNORE_VISIBILITY);
- emit_zs(ring, pfb->zsbuf, NULL);
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
+ emit_zs(ring, pfb->zsbuf, NULL);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
- OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
- OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
- A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
+ OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+ OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+ A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
- OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
- OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
- A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
+ OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+ OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+ A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
- OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
- A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+ OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+ A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
}
static void
fd5_emit_sysmem_fini(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- fd5_emit_lrz_flush(batch, ring);
+ fd5_emit_lrz_flush(batch, ring);
- fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
}
void
-fd5_gmem_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->emit_tile_init = fd5_emit_tile_init;
- ctx->emit_tile_prep = fd5_emit_tile_prep;
- ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
- ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
- ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
- ctx->emit_tile_fini = fd5_emit_tile_fini;
- ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
- ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_tile_init = fd5_emit_tile_init;
+ ctx->emit_tile_prep = fd5_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
+ ctx->emit_tile_fini = fd5_emit_tile_fini;
+ ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
+ ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
}
#include "pipe/p_state.h"
-#include "freedreno_resource.h"
-#include "fd5_image.h"
#include "fd5_format.h"
+#include "fd5_image.h"
#include "fd5_texture.h"
+#include "freedreno_resource.h"
static enum a4xx_state_block texsb[] = {
- [PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
- [PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
+ [PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
+ [PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
};
static enum a4xx_state_block imgsb[] = {
- [PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
- [PIPE_SHADER_FRAGMENT] = SB4_SSBO,
+ [PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
+ [PIPE_SHADER_FRAGMENT] = SB4_SSBO,
};
struct fd5_image {
- enum pipe_format pfmt;
- enum a5xx_tex_fmt fmt;
- enum a5xx_tex_type type;
- bool srgb;
- uint32_t cpp;
- uint32_t width;
- uint32_t height;
- uint32_t depth;
- uint32_t pitch;
- uint32_t array_pitch;
- struct fd_bo *bo;
- uint32_t offset;
- bool buffer;
+ enum pipe_format pfmt;
+ enum a5xx_tex_fmt fmt;
+ enum a5xx_tex_type type;
+ bool srgb;
+ uint32_t cpp;
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t pitch;
+ uint32_t array_pitch;
+ struct fd_bo *bo;
+ uint32_t offset;
+ bool buffer;
};
-static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
+static void
+translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
{
- enum pipe_format format = pimg->format;
- struct pipe_resource *prsc = pimg->resource;
- struct fd_resource *rsc = fd_resource(prsc);
-
- if (!pimg->resource) {
- memset(img, 0, sizeof(*img));
- return;
- }
-
- img->pfmt = format;
- img->fmt = fd5_pipe2tex(format);
- img->type = fd5_tex_type(prsc->target);
- img->srgb = util_format_is_srgb(format);
- img->cpp = rsc->layout.cpp;
- img->bo = rsc->bo;
-
- /* Treat cube textures as 2d-array: */
- if (img->type == A5XX_TEX_CUBE)
- img->type = A5XX_TEX_2D;
-
- if (prsc->target == PIPE_BUFFER) {
- img->buffer = true;
- img->offset = pimg->u.buf.offset;
- img->pitch = 0;
- img->array_pitch = 0;
-
- /* size is encoded with low 15b in WIDTH and high bits in
- * HEIGHT, in units of elements:
- */
- unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
- img->width = sz & MASK(15);
- img->height = sz >> 15;
- img->depth = 0;
- } else {
- img->buffer = false;
-
- unsigned lvl = pimg->u.tex.level;
- img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
- img->pitch = fd_resource_pitch(rsc, lvl);
-
- img->width = u_minify(prsc->width0, lvl);
- img->height = u_minify(prsc->height0, lvl);
-
- unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
-
- switch (prsc->target) {
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_2D:
- img->array_pitch = rsc->layout.layer_size;
- img->depth = 1;
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- img->array_pitch = rsc->layout.layer_size;
- img->depth = layers;
- break;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- img->array_pitch = rsc->layout.layer_size;
- img->depth = layers;
- break;
- case PIPE_TEXTURE_3D:
- img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
- img->depth = u_minify(prsc->depth0, lvl);
- break;
- default:
- img->array_pitch = 0;
- img->depth = 0;
- break;
- }
- }
+ enum pipe_format format = pimg->format;
+ struct pipe_resource *prsc = pimg->resource;
+ struct fd_resource *rsc = fd_resource(prsc);
+
+ if (!pimg->resource) {
+ memset(img, 0, sizeof(*img));
+ return;
+ }
+
+ img->pfmt = format;
+ img->fmt = fd5_pipe2tex(format);
+ img->type = fd5_tex_type(prsc->target);
+ img->srgb = util_format_is_srgb(format);
+ img->cpp = rsc->layout.cpp;
+ img->bo = rsc->bo;
+
+ /* Treat cube textures as 2d-array: */
+ if (img->type == A5XX_TEX_CUBE)
+ img->type = A5XX_TEX_2D;
+
+ if (prsc->target == PIPE_BUFFER) {
+ img->buffer = true;
+ img->offset = pimg->u.buf.offset;
+ img->pitch = 0;
+ img->array_pitch = 0;
+
+ /* size is encoded with low 15b in WIDTH and high bits in
+ * HEIGHT, in units of elements:
+ */
+ unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
+ img->width = sz & MASK(15);
+ img->height = sz >> 15;
+ img->depth = 0;
+ } else {
+ img->buffer = false;
+
+ unsigned lvl = pimg->u.tex.level;
+ img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
+ img->pitch = fd_resource_pitch(rsc, lvl);
+
+ img->width = u_minify(prsc->width0, lvl);
+ img->height = u_minify(prsc->height0, lvl);
+
+ unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
+
+ switch (prsc->target) {
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ img->array_pitch = rsc->layout.layer_size;
+ img->depth = 1;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ img->array_pitch = rsc->layout.layer_size;
+ img->depth = layers;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ img->array_pitch = rsc->layout.layer_size;
+ img->depth = layers;
+ break;
+ case PIPE_TEXTURE_3D:
+ img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
+ img->depth = u_minify(prsc->depth0, lvl);
+ break;
+ default:
+ img->array_pitch = 0;
+ img->depth = 0;
+ break;
+ }
+ }
}
-static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
- struct fd5_image *img, enum pipe_shader_type shader)
+static void
+emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, struct fd5_image *img,
+ enum pipe_shader_type shader)
{
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
- CP_LOAD_STATE4_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-
- OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
- fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
- COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
- OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
- A5XX_TEX_CONST_1_HEIGHT(img->height));
- OUT_RING(ring,
- COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) |
- A5XX_TEX_CONST_2_TYPE(img->type) |
- A5XX_TEX_CONST_2_PITCH(img->pitch));
- OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
- if (img->bo) {
- OUT_RELOC(ring, img->bo, img->offset,
- (uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
- }
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
+ OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
+ fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
+ COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
+ OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
+ A5XX_TEX_CONST_1_HEIGHT(img->height));
+ OUT_RING(ring,
+ COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) |
+ A5XX_TEX_CONST_2_TYPE(img->type) |
+ A5XX_TEX_CONST_2_PITCH(img->pitch));
+ OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
+ if (img->bo) {
+ OUT_RELOC(ring, img->bo, img->offset,
+ (uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
}
-static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
- struct fd5_image *img, enum pipe_shader_type shader)
+static void
+emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
+ struct fd5_image *img, enum pipe_shader_type shader)
{
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
- CP_LOAD_STATE4_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- OUT_RING(ring, A5XX_SSBO_1_0_FMT(img->fmt) |
- A5XX_SSBO_1_0_WIDTH(img->width));
- OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
- A5XX_SSBO_1_1_DEPTH(img->depth));
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
- CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
- CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
- CP_LOAD_STATE4_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
- CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- if (img->bo) {
- OUT_RELOC(ring, img->bo, img->offset, 0, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring,
+ CP_LOAD_STATE4_1_STATE_TYPE(1) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ OUT_RING(ring,
+ A5XX_SSBO_1_0_FMT(img->fmt) | A5XX_SSBO_1_0_WIDTH(img->width));
+ OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
+ A5XX_SSBO_1_1_DEPTH(img->depth));
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
+ CP_LOAD_STATE4_0_NUM_UNIT(1));
+ OUT_RING(ring,
+ CP_LOAD_STATE4_1_STATE_TYPE(2) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ if (img->bo) {
+ OUT_RELOC(ring, img->bo, img->offset, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
}
/* Emit required "SSBO" and sampler state. The sampler state is used by the
*/
void
fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type shader, const struct ir3_shader_variant *v)
+ enum pipe_shader_type shader,
+ const struct ir3_shader_variant *v)
{
- struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
- unsigned enabled_mask = so->enabled_mask;
- const struct ir3_ibo_mapping *m = &v->image_mapping;
+ struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
+ unsigned enabled_mask = so->enabled_mask;
+ const struct ir3_ibo_mapping *m = &v->image_mapping;
- while (enabled_mask) {
- unsigned index = u_bit_scan(&enabled_mask);
- struct fd5_image img;
+ while (enabled_mask) {
+ unsigned index = u_bit_scan(&enabled_mask);
+ struct fd5_image img;
- translate_image(&img, &so->si[index]);
+ translate_image(&img, &so->si[index]);
- emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
- emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader);
- }
+ emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
+ emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img,
+ shader);
+ }
}
struct ir3_shader_variant;
void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type shader, const struct ir3_shader_variant *v);
+ enum pipe_shader_type shader,
+ const struct ir3_shader_variant *v);
#endif /* FD5_IMAGE_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
#include "util/bitset.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_program.h"
-#include "fd5_program.h"
#include "fd5_emit.h"
-#include "fd5_texture.h"
#include "fd5_format.h"
+#include "fd5_program.h"
+#include "fd5_texture.h"
#include "ir3_cache.h"
void
fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
- const struct ir3_info *si = &so->info;
- enum a4xx_state_block sb = fd4_stage2shadersb(so->type);
- enum a4xx_state_src src;
- uint32_t i, sz, *bin;
-
- if (FD_DBG(DIRECT)) {
- sz = si->sizedwords;
- src = SS4_DIRECT;
- bin = fd_bo_map(so->bo);
- } else {
- sz = 0;
- src = SS4_INDIRECT;
- bin = NULL;
- }
-
- OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sz);
- OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
- CP_LOAD_STATE4_0_STATE_SRC(src) |
- CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen));
- if (bin) {
- OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
- OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
- } else {
- OUT_RELOC(ring, so->bo, 0,
- CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
- }
-
- /* for how clever coverity is, it is sometimes rather dull, and
- * doesn't realize that the only case where bin==NULL, sz==0:
- */
- assume(bin || (sz == 0));
-
- for (i = 0; i < sz; i++) {
- OUT_RING(ring, bin[i]);
- }
+ const struct ir3_info *si = &so->info;
+ enum a4xx_state_block sb = fd4_stage2shadersb(so->type);
+ enum a4xx_state_src src;
+ uint32_t i, sz, *bin;
+
+ if (FD_DBG(DIRECT)) {
+ sz = si->sizedwords;
+ src = SS4_DIRECT;
+ bin = fd_bo_map(so->bo);
+ } else {
+ sz = 0;
+ src = SS4_INDIRECT;
+ bin = NULL;
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sz);
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(src) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen));
+ if (bin) {
+ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ } else {
+ OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
+ }
+
+ /* for how clever coverity is, it is sometimes rather dull, and
+ * doesn't realize that the only case where bin==NULL, sz==0:
+ */
+ assume(bin || (sz == 0));
+
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, bin[i]);
+ }
}
/* TODO maybe some of this we could pre-compute once rather than having
*/
static void
emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
- struct ir3_shader_linkage *l)
+ struct ir3_shader_linkage *l)
{
- const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
- unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0};
- unsigned prog[align(l->max_loc, 2) / 2];
-
- memset(prog, 0, sizeof(prog));
-
- for (unsigned i = 0; i < strmout->num_outputs; i++) {
- const struct ir3_stream_output *out = &strmout->output[i];
- unsigned k = out->register_index;
- unsigned idx;
-
- ncomp[out->output_buffer] += out->num_components;
-
- /* linkage map sorted by order frag shader wants things, so
- * a bit less ideal here..
- */
- for (idx = 0; idx < l->cnt; idx++)
- if (l->var[idx].regid == v->outputs[k].regid)
- break;
-
- debug_assert(idx < l->cnt);
-
- for (unsigned j = 0; j < out->num_components; j++) {
- unsigned c = j + out->start_component;
- unsigned loc = l->var[idx].loc + c;
- unsigned off = j + out->dst_offset; /* in dwords */
-
- if (loc & 1) {
- prog[loc/2] |= A5XX_VPC_SO_PROG_B_EN |
- A5XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
- A5XX_VPC_SO_PROG_B_OFF(off * 4);
- } else {
- prog[loc/2] |= A5XX_VPC_SO_PROG_A_EN |
- A5XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
- A5XX_VPC_SO_PROG_A_OFF(off * 4);
- }
- }
- }
-
- OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * ARRAY_SIZE(prog)));
- OUT_RING(ring, REG_A5XX_VPC_SO_BUF_CNTL);
- OUT_RING(ring, A5XX_VPC_SO_BUF_CNTL_ENABLE |
- COND(ncomp[0] > 0, A5XX_VPC_SO_BUF_CNTL_BUF0) |
- COND(ncomp[1] > 0, A5XX_VPC_SO_BUF_CNTL_BUF1) |
- COND(ncomp[2] > 0, A5XX_VPC_SO_BUF_CNTL_BUF2) |
- COND(ncomp[3] > 0, A5XX_VPC_SO_BUF_CNTL_BUF3));
- OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(0));
- OUT_RING(ring, ncomp[0]);
- OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(1));
- OUT_RING(ring, ncomp[1]);
- OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(2));
- OUT_RING(ring, ncomp[2]);
- OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(3));
- OUT_RING(ring, ncomp[3]);
- OUT_RING(ring, REG_A5XX_VPC_SO_CNTL);
- OUT_RING(ring, A5XX_VPC_SO_CNTL_ENABLE);
- for (unsigned i = 0; i < ARRAY_SIZE(prog); i++) {
- OUT_RING(ring, REG_A5XX_VPC_SO_PROG);
- OUT_RING(ring, prog[i]);
- }
+ const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
+ unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0};
+ unsigned prog[align(l->max_loc, 2) / 2];
+
+ memset(prog, 0, sizeof(prog));
+
+ for (unsigned i = 0; i < strmout->num_outputs; i++) {
+ const struct ir3_stream_output *out = &strmout->output[i];
+ unsigned k = out->register_index;
+ unsigned idx;
+
+ ncomp[out->output_buffer] += out->num_components;
+
+ /* linkage map sorted by order frag shader wants things, so
+ * a bit less ideal here..
+ */
+ for (idx = 0; idx < l->cnt; idx++)
+ if (l->var[idx].regid == v->outputs[k].regid)
+ break;
+
+ debug_assert(idx < l->cnt);
+
+ for (unsigned j = 0; j < out->num_components; j++) {
+ unsigned c = j + out->start_component;
+ unsigned loc = l->var[idx].loc + c;
+ unsigned off = j + out->dst_offset; /* in dwords */
+
+ if (loc & 1) {
+ prog[loc / 2] |= A5XX_VPC_SO_PROG_B_EN |
+ A5XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
+ A5XX_VPC_SO_PROG_B_OFF(off * 4);
+ } else {
+ prog[loc / 2] |= A5XX_VPC_SO_PROG_A_EN |
+ A5XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
+ A5XX_VPC_SO_PROG_A_OFF(off * 4);
+ }
+ }
+ }
+
+ OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * ARRAY_SIZE(prog)));
+ OUT_RING(ring, REG_A5XX_VPC_SO_BUF_CNTL);
+ OUT_RING(ring, A5XX_VPC_SO_BUF_CNTL_ENABLE |
+ COND(ncomp[0] > 0, A5XX_VPC_SO_BUF_CNTL_BUF0) |
+ COND(ncomp[1] > 0, A5XX_VPC_SO_BUF_CNTL_BUF1) |
+ COND(ncomp[2] > 0, A5XX_VPC_SO_BUF_CNTL_BUF2) |
+ COND(ncomp[3] > 0, A5XX_VPC_SO_BUF_CNTL_BUF3));
+ OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(0));
+ OUT_RING(ring, ncomp[0]);
+ OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(1));
+ OUT_RING(ring, ncomp[1]);
+ OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(2));
+ OUT_RING(ring, ncomp[2]);
+ OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(3));
+ OUT_RING(ring, ncomp[3]);
+ OUT_RING(ring, REG_A5XX_VPC_SO_CNTL);
+ OUT_RING(ring, A5XX_VPC_SO_CNTL_ENABLE);
+ for (unsigned i = 0; i < ARRAY_SIZE(prog); i++) {
+ OUT_RING(ring, REG_A5XX_VPC_SO_PROG);
+ OUT_RING(ring, prog[i]);
+ }
}
struct stage {
- const struct ir3_shader_variant *v;
- const struct ir3_info *i;
- /* const sizes are in units of 4 * vec4 */
- uint8_t constoff;
- uint8_t constlen;
- /* instr sizes are in units of 16 instructions */
- uint8_t instroff;
- uint8_t instrlen;
+ const struct ir3_shader_variant *v;
+ const struct ir3_info *i;
+ /* const sizes are in units of 4 * vec4 */
+ uint8_t constoff;
+ uint8_t constlen;
+ /* instr sizes are in units of 16 instructions */
+ uint8_t instroff;
+ uint8_t instrlen;
};
-enum {
- VS = 0,
- FS = 1,
- HS = 2,
- DS = 3,
- GS = 4,
- MAX_STAGES
-};
+enum { VS = 0, FS = 1, HS = 2, DS = 3, GS = 4, MAX_STAGES };
static void
setup_stages(struct fd5_emit *emit, struct stage *s)
{
- unsigned i;
-
- s[VS].v = fd5_emit_get_vp(emit);
- s[FS].v = fd5_emit_get_fp(emit);
-
- s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
-
- for (i = 0; i < MAX_STAGES; i++) {
- if (s[i].v) {
- s[i].i = &s[i].v->info;
- /* constlen is in units of 4 * vec4: */
- assert(s[i].v->constlen % 4 == 0);
- s[i].constlen = s[i].v->constlen / 4;
- /* instrlen is already in units of 16 instr.. although
- * probably we should ditch that and not make the compiler
- * care about instruction group size of a3xx vs a5xx
- */
- s[i].instrlen = s[i].v->instrlen;
- } else {
- s[i].i = NULL;
- s[i].constlen = 0;
- s[i].instrlen = 0;
- }
- }
-
- /* NOTE: at least for gles2, blob partitions VS at bottom of const
- * space and FS taking entire remaining space. We probably don't
- * need to do that the same way, but for now mimic what the blob
- * does to make it easier to diff against register values from blob
- *
- * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
- * is run from external memory.
- */
- if ((s[VS].instrlen + s[FS].instrlen) > 64) {
- /* prioritize FS for internal memory: */
- if (s[FS].instrlen < 64) {
- /* if FS can fit, kick VS out to external memory: */
- s[VS].instrlen = 0;
- } else if (s[VS].instrlen < 64) {
- /* otherwise if VS can fit, kick out FS: */
- s[FS].instrlen = 0;
- } else {
- /* neither can fit, run both from external memory: */
- s[VS].instrlen = 0;
- s[FS].instrlen = 0;
- }
- }
-
- unsigned constoff = 0;
- for (i = 0; i < MAX_STAGES; i++) {
- s[i].constoff = constoff;
- constoff += s[i].constlen;
- }
-
- s[VS].instroff = 0;
- s[FS].instroff = 64 - s[FS].instrlen;
- s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+ unsigned i;
+
+ s[VS].v = fd5_emit_get_vp(emit);
+ s[FS].v = fd5_emit_get_fp(emit);
+
+ s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
+
+ for (i = 0; i < MAX_STAGES; i++) {
+ if (s[i].v) {
+ s[i].i = &s[i].v->info;
+ /* constlen is in units of 4 * vec4: */
+ assert(s[i].v->constlen % 4 == 0);
+ s[i].constlen = s[i].v->constlen / 4;
+ /* instrlen is already in units of 16 instr.. although
+ * probably we should ditch that and not make the compiler
+ * care about instruction group size of a3xx vs a5xx
+ */
+ s[i].instrlen = s[i].v->instrlen;
+ } else {
+ s[i].i = NULL;
+ s[i].constlen = 0;
+ s[i].instrlen = 0;
+ }
+ }
+
+ /* NOTE: at least for gles2, blob partitions VS at bottom of const
+ * space and FS taking entire remaining space. We probably don't
+ * need to do that the same way, but for now mimic what the blob
+ * does to make it easier to diff against register values from blob
+ *
+ * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
+ * is run from external memory.
+ */
+ if ((s[VS].instrlen + s[FS].instrlen) > 64) {
+ /* prioritize FS for internal memory: */
+ if (s[FS].instrlen < 64) {
+ /* if FS can fit, kick VS out to external memory: */
+ s[VS].instrlen = 0;
+ } else if (s[VS].instrlen < 64) {
+ /* otherwise if VS can fit, kick out FS: */
+ s[FS].instrlen = 0;
+ } else {
+ /* neither can fit, run both from external memory: */
+ s[VS].instrlen = 0;
+ s[FS].instrlen = 0;
+ }
+ }
+
+ unsigned constoff = 0;
+ for (i = 0; i < MAX_STAGES; i++) {
+ s[i].constoff = constoff;
+ constoff += s[i].constlen;
+ }
+
+ s[VS].instroff = 0;
+ s[FS].instroff = 64 - s[FS].instrlen;
+ s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
}
static inline uint32_t
next_regid(uint32_t reg, uint32_t increment)
{
- if (VALIDREG(reg))
- return reg + increment;
- else
- return regid(63,0);
+ if (VALIDREG(reg))
+ return reg + increment;
+ else
+ return regid(63, 0);
}
void
fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit)
+ struct fd5_emit *emit)
{
- struct stage s[MAX_STAGES];
- uint32_t pos_regid, psize_regid, color_regid[8];
- uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, samp_mask_regid;
- uint32_t ij_regid[IJ_COUNT], vertex_regid, instance_regid, clip0_regid, clip1_regid;
- enum a3xx_threadsize fssz;
- uint8_t psize_loc = ~0;
- int i, j;
-
- setup_stages(emit, s);
-
- bool do_streamout = (s[VS].v->shader->stream_output.num_outputs > 0);
- uint8_t clip_mask = s[VS].v->clip_mask, cull_mask = s[VS].v->cull_mask;
- uint8_t clip_cull_mask = clip_mask | cull_mask;
-
- fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS;
-
- pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
- psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
- clip0_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_CLIP_DIST0);
- clip1_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_CLIP_DIST1);
- vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
- instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);
-
- if (s[FS].v->color0_mrt) {
- color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
- color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
- ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
- } else {
- color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
- color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
- color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
- color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
- color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
- color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
- color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
- color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
- }
-
- samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID);
- samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN);
- face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
- coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
- zwcoord_regid = next_regid(coord_regid, 2);
- for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
- ij_regid[i] = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
-
- /* we could probably divide this up into things that need to be
- * emitted if frag-prog is dirty vs if vert-prog is dirty..
- */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5);
- OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
- A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
- COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
- A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
- COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
- A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
- COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
- A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
- COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
- A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
- COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED));
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
- OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
- COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
- OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
- COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
- OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
- COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
- OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
- COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
- OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
- COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
-
- OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
- OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
- A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
- COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
- A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
- COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
- A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
- COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
- A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
- COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED));
- OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
- A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
- COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED));
-
- OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
- OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */
- OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
- OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */
- OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
- OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */
- OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
- OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */
- OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
- OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */
- OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
- OUT_RING(ring, 0x00000000); /* HLSQ_CS_CONSTLEN */
- OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
- A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
- 0x6 | /* XXX seems to be always set? */
- A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
- COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
-
- /* If we have streamout, link against the real FS in the binning program,
- * rather than the dummy FS used for binning pass state, to ensure the
- * OUTLOC's match. Depending on whether we end up doing sysmem or gmem, the
- * actual streamout could happen with either the binning pass or draw pass
- * program, but the same streamout stateobj is used in either case:
- */
- const struct ir3_shader_variant *link_fs = s[FS].v;
- if (do_streamout && emit->binning_pass)
- link_fs = emit->prog->fs;
- struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, s[VS].v, link_fs, true);
-
- uint8_t clip0_loc = l.clip0_loc;
- uint8_t clip1_loc = l.clip1_loc;
-
- OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
- OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
- OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
- OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
- OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
-
- /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
- ir3_link_stream_out(&l, s[VS].v);
-
- /* a5xx appends pos/psize to end of the linkage map: */
- if (VALIDREG(pos_regid))
- ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
-
- if (VALIDREG(psize_regid)) {
- psize_loc = l.max_loc;
- ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
- }
-
- /* Handle the case where clip/cull distances aren't read by the FS. Make
- * sure to avoid adding an output with an empty writemask if the user
- * disables all the clip distances in the API so that the slot is unused.
- */
- if (clip0_loc == 0xff && VALIDREG(clip0_regid) && (clip_cull_mask & 0xf) != 0) {
- clip0_loc = l.max_loc;
- ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
- }
-
- if (clip1_loc == 0xff && VALIDREG(clip1_regid) && (clip_cull_mask >> 4) != 0) {
- clip1_loc = l.max_loc;
- ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
- }
-
- /* If we have stream-out, we use the full shader for binning
- * pass, rather than the optimized binning pass one, so that we
- * have all the varying outputs available for xfb. So streamout
- * state should always be derived from the non-binning pass
- * program:
- */
- if (do_streamout && !emit->binning_pass)
- emit_stream_out(ring, s[VS].v, &l);
-
- for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);
-
- reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
- reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
- j++;
-
- reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
- reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
- j++;
-
- OUT_RING(ring, reg);
- }
-
- for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
- uint32_t reg = 0;
-
- OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);
-
- reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
- reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
- reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
- reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
-
- OUT_RING(ring, reg);
- }
-
- OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
- OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */
-
- if (s[VS].instrlen)
- fd5_emit_shader(ring, s[VS].v);
-
- // TODO depending on other bits in this reg (if any) set somewhere else?
- OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));
-
- OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
-
- OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
- OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
- COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
- 0x10000); // XXX
-
- fd5_context(ctx)->max_loc = l.max_loc;
-
- if (emit->binning_pass) {
- OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
- OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
- OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
- } else {
- OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
- OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */
- }
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
- OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
- A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) |
- 0x00000880); /* XXX HLSQ_CONTROL_0 */
- OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
- OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
- A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
- A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samp_mask_regid) |
- A5XX_HLSQ_CONTROL_2_REG_SIZE(ij_regid[IJ_PERSP_SIZE]));
- OUT_RING(ring,
- A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
- A5XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
- A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) |
- A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_LINEAR_CENTROID]));
- OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
- A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
- A5XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
- A5XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
-
- OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
- OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
- 0x40006 | /* XXX set pretty much everywhere */
- A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
- A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
- A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
- A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
- COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
-
- OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
- OUT_RING(ring, 0x020fffff); /* XXX */
-
- OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
- OUT_RING(ring, 0x0000ffff); /* XXX */
-
- OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
- OUT_RING(ring, 0x00000010); /* XXX */
-
- /* XXX: missing enable bits for per-sample bary linear centroid and IJ_PERSP_SIZE
- * (should be identical to a6xx)
- */
-
- OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
- OUT_RING(ring,
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A5XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A5XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
- COND(s[FS].v->fragcoord_compmask != 0,
- A5XX_GRAS_CNTL_COORD_MASK(s[FS].v->fragcoord_compmask) |
- A5XX_GRAS_CNTL_SIZE) |
- COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_SIZE) |
- CONDREG(ij_regid[IJ_LINEAR_PIXEL], A5XX_GRAS_CNTL_SIZE));
-
- OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2);
- OUT_RING(ring,
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A5XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A5XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
- COND(s[FS].v->fragcoord_compmask != 0,
- A5XX_RB_RENDER_CONTROL0_COORD_MASK(s[FS].v->fragcoord_compmask) |
- A5XX_RB_RENDER_CONTROL0_SIZE) |
- COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_SIZE) |
- CONDREG(ij_regid[IJ_LINEAR_PIXEL], A5XX_RB_RENDER_CONTROL0_SIZE));
- OUT_RING(ring,
- CONDREG(samp_mask_regid, A5XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
- COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS) |
- CONDREG(samp_id_regid, A5XX_RB_RENDER_CONTROL1_SAMPLEID));
-
- OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8);
- for (i = 0; i < 8; i++) {
- OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
- COND(color_regid[i] & HALF_REG_ID, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
- }
-
-
- OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
- OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
- A5XX_VPC_PACK_PSIZELOC(psize_loc));
-
- if (!emit->binning_pass) {
- uint32_t vinterp[8], vpsrepl[8];
-
- memset(vinterp, 0, sizeof(vinterp));
- memset(vpsrepl, 0, sizeof(vpsrepl));
-
- /* looks like we need to do int varyings in the frag
- * shader on a5xx (no flatshad reg? or a420.0 bug?):
- *
- * (sy)(ss)nop
- * (sy)ldlv.u32 r0.x,l[r0.x], 1
- * ldlv.u32 r0.y,l[r0.x+1], 1
- * (ss)bary.f (ei)r63.x, 0, r0.x
- * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
- * (rpt5)nop
- * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
- *
- * Possibly on later a5xx variants we'll be able to use
- * something like the code below instead of workaround
- * in the shader:
- */
- /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
- for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
- /* NOTE: varyings are packed, so if compmask is 0xb
- * then first, third, and fourth component occupy
- * three consecutive varying slots:
- */
- unsigned compmask = s[FS].v->inputs[j].compmask;
-
- uint32_t inloc = s[FS].v->inputs[j].inloc;
-
- if (s[FS].v->inputs[j].flat ||
- (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
- uint32_t loc = inloc;
-
- for (i = 0; i < 4; i++) {
- if (compmask & (1 << i)) {
- vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
- //flatshade[loc / 32] |= 1 << (loc % 32);
- loc++;
- }
- }
- }
-
- bool coord_mode = emit->sprite_coord_mode;
- if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable, &coord_mode)) {
- /* mask is two 2-bit fields, where:
- * '01' -> S
- * '10' -> T
- * '11' -> 1 - T (flip mode)
- */
- unsigned mask = coord_mode ? 0b1101 : 0b1001;
- uint32_t loc = inloc;
- if (compmask & 0x1) {
- vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x2) {
- vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x4) {
- /* .z <- 0.0f */
- vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x8) {
- /* .w <- 1.0f */
- vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
- loc++;
- }
- }
- }
-
- OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
- for (i = 0; i < 8; i++)
- OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
-
- OUT_PKT4(ring, REG_A5XX_VPC_VARYING_PS_REPL_MODE(0), 8);
- for (i = 0; i < 8; i++)
- OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
- }
-
- OUT_PKT4(ring, REG_A5XX_GRAS_VS_CL_CNTL, 1);
- OUT_RING(ring, A5XX_GRAS_VS_CL_CNTL_CLIP_MASK(clip_mask) |
- A5XX_GRAS_VS_CL_CNTL_CULL_MASK(cull_mask));
-
- OUT_PKT4(ring, REG_A5XX_VPC_CLIP_CNTL, 1);
- OUT_RING(ring, A5XX_VPC_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
- A5XX_VPC_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
- A5XX_VPC_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
-
- OUT_PKT4(ring, REG_A5XX_PC_CLIP_CNTL, 1);
- OUT_RING(ring, A5XX_PC_CLIP_CNTL_CLIP_MASK(clip_mask));
-
- if (!emit->binning_pass)
- if (s[FS].instrlen)
- fd5_emit_shader(ring, s[FS].v);
-
- OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_1, 5);
- OUT_RING(ring, A5XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
- A5XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
- 0xfc0000);
- OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */
- OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_3 */
- OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
- OUT_RING(ring, 0x00000000); /* VFD_CONTROL_5 */
+ struct stage s[MAX_STAGES];
+ uint32_t pos_regid, psize_regid, color_regid[8];
+ uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid,
+ samp_mask_regid;
+ uint32_t ij_regid[IJ_COUNT], vertex_regid, instance_regid, clip0_regid,
+ clip1_regid;
+ enum a3xx_threadsize fssz;
+ uint8_t psize_loc = ~0;
+ int i, j;
+
+ setup_stages(emit, s);
+
+ bool do_streamout = (s[VS].v->shader->stream_output.num_outputs > 0);
+ uint8_t clip_mask = s[VS].v->clip_mask, cull_mask = s[VS].v->cull_mask;
+ uint8_t clip_cull_mask = clip_mask | cull_mask;
+
+ fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS;
+
+ pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+ psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
+ clip0_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_CLIP_DIST0);
+ clip1_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_CLIP_DIST1);
+ vertex_regid =
+ ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+ instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);
+
+ if (s[FS].v->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+ ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
+ } else {
+ color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+ color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+ color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+ color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+ color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
+ }
+
+ samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID);
+ samp_mask_regid =
+ ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN);
+ face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = next_regid(coord_regid, 2);
+ for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
+ ij_regid[i] = ir3_find_sysval_regid(
+ s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5);
+ OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
+ COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
+ COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
+ COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
+ COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
+ COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED));
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
+ OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
+ COND(s[VS].v && s[VS].v->has_ssbo,
+ A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
+ COND(s[FS].v && s[FS].v->has_ssbo,
+ A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
+ COND(s[HS].v && s[HS].v->has_ssbo,
+ A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
+ COND(s[DS].v && s[DS].v->has_ssbo,
+ A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
+ COND(s[GS].v && s[GS].v->has_ssbo,
+ A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
+
+ OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
+ OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
+ COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
+ COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
+ COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
+ COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED));
+ OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
+ COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED));
+
+ OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
+ OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */
+ OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
+ OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */
+ OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
+ OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */
+ OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
+ OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */
+ OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
+ OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */
+ OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
+ OUT_RING(ring, 0x00000000); /* HLSQ_CS_CONSTLEN */
+ OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
+ OUT_RING(ring,
+ A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+ A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+ 0x6 | /* XXX seems to be always set? */
+ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
+ COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+
+ /* If we have streamout, link against the real FS in the binning program,
+ * rather than the dummy FS used for binning pass state, to ensure the
+ * OUTLOC's match. Depending on whether we end up doing sysmem or gmem, the
+ * actual streamout could happen with either the binning pass or draw pass
+ * program, but the same streamout stateobj is used in either case:
+ */
+ const struct ir3_shader_variant *link_fs = s[FS].v;
+ if (do_streamout && emit->binning_pass)
+ link_fs = emit->prog->fs;
+ struct ir3_shader_linkage l = {0};
+ ir3_link_shaders(&l, s[VS].v, link_fs, true);
+
+ uint8_t clip0_loc = l.clip0_loc;
+ uint8_t clip1_loc = l.clip1_loc;
+
+ OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
+ OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
+ OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
+ OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
+ OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
+
+ /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
+ ir3_link_stream_out(&l, s[VS].v);
+
+ /* a5xx appends pos/psize to end of the linkage map: */
+ if (VALIDREG(pos_regid))
+ ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
+
+ if (VALIDREG(psize_regid)) {
+ psize_loc = l.max_loc;
+ ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
+ }
+
+ /* Handle the case where clip/cull distances aren't read by the FS. Make
+ * sure to avoid adding an output with an empty writemask if the user
+ * disables all the clip distances in the API so that the slot is unused.
+ */
+ if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
+ (clip_cull_mask & 0xf) != 0) {
+ clip0_loc = l.max_loc;
+ ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
+ }
+
+ if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
+ (clip_cull_mask >> 4) != 0) {
+ clip1_loc = l.max_loc;
+ ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
+ }
+
+ /* If we have stream-out, we use the full shader for binning
+ * pass, rather than the optimized binning pass one, so that we
+ * have all the varying outputs available for xfb. So streamout
+ * state should always be derived from the non-binning pass
+ * program:
+ */
+ if (do_streamout && !emit->binning_pass)
+ emit_stream_out(ring, s[VS].v, &l);
+
+ for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);
+
+ reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
+ reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
+ j++;
+
+ reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
+ reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
+ j++;
+
+ OUT_RING(ring, reg);
+ }
+
+ for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);
+
+ reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
+ reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
+ reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
+ reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
+
+ OUT_RING(ring, reg);
+ }
+
+ OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
+ OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */
+
+ if (s[VS].instrlen)
+ fd5_emit_shader(ring, s[VS].v);
+
+ // TODO depending on other bits in this reg (if any) set somewhere else?
+ OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));
+
+ OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
+
+ OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
+ OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
+ COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
+ 0x10000); // XXX
+
+ fd5_context(ctx)->max_loc = l.max_loc;
+
+ if (emit->binning_pass) {
+ OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
+ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
+ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
+ } else {
+ OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
+ OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */
+ }
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
+ OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
+ A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) |
+ 0x00000880); /* XXX HLSQ_CONTROL_0 */
+ OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
+ OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
+ A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
+ A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samp_mask_regid) |
+ A5XX_HLSQ_CONTROL_2_REG_SIZE(ij_regid[IJ_PERSP_SIZE]));
+ OUT_RING(
+ ring,
+ A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
+ A5XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
+ A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(
+ ij_regid[IJ_PERSP_CENTROID]) |
+ A5XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(
+ ij_regid[IJ_LINEAR_CENTROID]));
+ OUT_RING(
+ ring,
+ A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
+ A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
+ A5XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
+ A5XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
+
+ OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
+ OUT_RING(
+ ring,
+ COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
+ 0x40006 | /* XXX set pretty much everywhere */
+ A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
+ A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+ A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+ A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
+ COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0x020fffff); /* XXX */
+
+ OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
+ OUT_RING(ring, 0x0000ffff); /* XXX */
+
+ OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
+ OUT_RING(ring, 0x00000010); /* XXX */
+
+ /* XXX: missing enable bits for per-sample bary linear centroid and
+ * IJ_PERSP_SIZE (should be identical to a6xx)
+ */
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+ OUT_RING(ring,
+ CONDREG(ij_regid[IJ_PERSP_PIXEL], A5XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID],
+ A5XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
+ COND(s[FS].v->fragcoord_compmask != 0,
+ A5XX_GRAS_CNTL_COORD_MASK(s[FS].v->fragcoord_compmask) |
+ A5XX_GRAS_CNTL_SIZE) |
+ COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_SIZE) |
+ CONDREG(ij_regid[IJ_LINEAR_PIXEL], A5XX_GRAS_CNTL_SIZE));
+
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2);
+ OUT_RING(
+ ring,
+ CONDREG(ij_regid[IJ_PERSP_PIXEL],
+ A5XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID],
+ A5XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
+ COND(s[FS].v->fragcoord_compmask != 0,
+ A5XX_RB_RENDER_CONTROL0_COORD_MASK(s[FS].v->fragcoord_compmask) |
+ A5XX_RB_RENDER_CONTROL0_SIZE) |
+ COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_SIZE) |
+ CONDREG(ij_regid[IJ_LINEAR_PIXEL], A5XX_RB_RENDER_CONTROL0_SIZE));
+ OUT_RING(ring,
+ CONDREG(samp_mask_regid, A5XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
+ COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS) |
+ CONDREG(samp_id_regid, A5XX_RB_RENDER_CONTROL1_SAMPLEID));
+
+ OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
+ COND(color_regid[i] & HALF_REG_ID,
+ A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
+ OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
+ A5XX_VPC_PACK_PSIZELOC(psize_loc));
+
+ if (!emit->binning_pass) {
+ uint32_t vinterp[8], vpsrepl[8];
+
+ memset(vinterp, 0, sizeof(vinterp));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
+
+ /* looks like we need to do int varyings in the frag
+ * shader on a5xx (no flatshad reg? or a420.0 bug?):
+ *
+ * (sy)(ss)nop
+ * (sy)ldlv.u32 r0.x,l[r0.x], 1
+ * ldlv.u32 r0.y,l[r0.x+1], 1
+ * (ss)bary.f (ei)r63.x, 0, r0.x
+ * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+ * (rpt5)nop
+ * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+ *
+ * Possibly on later a5xx variants we'll be able to use
+ * something like the code below instead of workaround
+ * in the shader:
+ */
+ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
+ for (j = -1;
+ (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count;) {
+ /* NOTE: varyings are packed, so if compmask is 0xb
+ * then first, third, and fourth component occupy
+ * three consecutive varying slots:
+ */
+ unsigned compmask = s[FS].v->inputs[j].compmask;
+
+ uint32_t inloc = s[FS].v->inputs[j].inloc;
+
+ if (s[FS].v->inputs[j].flat ||
+ (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
+ uint32_t loc = inloc;
+
+ for (i = 0; i < 4; i++) {
+ if (compmask & (1 << i)) {
+ vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+ // flatshade[loc / 32] |= 1 << (loc % 32);
+ loc++;
+ }
+ }
+ }
+
+ bool coord_mode = emit->sprite_coord_mode;
+ if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable,
+ &coord_mode)) {
+ /* mask is two 2-bit fields, where:
+ * '01' -> S
+ * '10' -> T
+ * '11' -> 1 - T (flip mode)
+ */
+ unsigned mask = coord_mode ? 0b1101 : 0b1001;
+ uint32_t loc = inloc;
+ if (compmask & 0x1) {
+ vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x2) {
+ vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x4) {
+ /* .z <- 0.0f */
+ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x8) {
+ /* .w <- 1.0f */
+ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
+
+ OUT_PKT4(ring, REG_A5XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ }
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_VS_CL_CNTL, 1);
+ OUT_RING(ring, A5XX_GRAS_VS_CL_CNTL_CLIP_MASK(clip_mask) |
+ A5XX_GRAS_VS_CL_CNTL_CULL_MASK(cull_mask));
+
+ OUT_PKT4(ring, REG_A5XX_VPC_CLIP_CNTL, 1);
+ OUT_RING(ring, A5XX_VPC_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
+ A5XX_VPC_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
+ A5XX_VPC_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
+
+ OUT_PKT4(ring, REG_A5XX_PC_CLIP_CNTL, 1);
+ OUT_RING(ring, A5XX_PC_CLIP_CNTL_CLIP_MASK(clip_mask));
+
+ if (!emit->binning_pass)
+ if (s[FS].instrlen)
+ fd5_emit_shader(ring, s[FS].v);
+
+ OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_1, 5);
+ OUT_RING(ring, A5XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A5XX_VFD_CONTROL_1_REGID4INST(instance_regid) | 0xfc0000);
+ OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */
+ OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_3 */
+ OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
+ OUT_RING(ring, 0x00000000); /* VFD_CONTROL_5 */
}
static struct ir3_program_state *
fd5_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs,
- struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds,
- struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key)
- in_dt
+ struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
+ struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
+ struct ir3_shader_variant *fs,
+ const struct ir3_shader_key *key) in_dt
{
- struct fd_context *ctx = fd_context(data);
- struct fd5_program_state *state = CALLOC_STRUCT(fd5_program_state);
+ struct fd_context *ctx = fd_context(data);
+ struct fd5_program_state *state = CALLOC_STRUCT(fd5_program_state);
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- state->bs = bs;
- state->vs = vs;
- state->fs = fs;
+ state->bs = bs;
+ state->vs = vs;
+ state->fs = fs;
- return &state->base;
+ return &state->base;
}
static void
fd5_program_destroy(void *data, struct ir3_program_state *state)
{
- struct fd5_program_state *so = fd5_program_state(state);
- free(so);
+ struct fd5_program_state *so = fd5_program_state(state);
+ free(so);
}
static const struct ir3_cache_funcs cache_funcs = {
- .create_state = fd5_program_create,
- .destroy_state = fd5_program_destroy,
+ .create_state = fd5_program_create,
+ .destroy_state = fd5_program_destroy,
};
void
fd5_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
- ir3_prog_init(pctx);
- fd_prog_init(pctx);
+ ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
+ ir3_prog_init(pctx);
+ fd_prog_init(pctx);
}
struct fd5_emit;
struct fd5_program_state {
- struct ir3_program_state base;
- struct ir3_shader_variant *bs; /* VS for when emit->binning */
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *fs; /* FS for when !emit->binning */
+ struct ir3_program_state base;
+ struct ir3_shader_variant *bs; /* VS for when emit->binning */
+ struct ir3_shader_variant *vs;
+ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd5_program_state *
fd5_program_state(struct ir3_program_state *state)
{
- return (struct fd5_program_state *)state;
+ return (struct fd5_program_state *)state;
}
-void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
+void fd5_emit_shader(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *so);
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit);
+ struct fd5_emit *emit);
void fd5_prog_init(struct pipe_context *pctx);
#include "fd5_query.h"
struct PACKED fd5_query_sample {
- uint64_t start;
- uint64_t result;
- uint64_t stop;
+ uint64_t start;
+ uint64_t result;
+ uint64_t stop;
};
/* offset of a single field of an array of fd5_query_sample: */
-#define query_sample_idx(aq, idx, field) \
- fd_resource((aq)->prsc)->bo, \
- (idx * sizeof(struct fd5_query_sample)) + \
- offsetof(struct fd5_query_sample, field), \
- 0, 0
+#define query_sample_idx(aq, idx, field) \
+ fd_resource((aq)->prsc)->bo, \
+ (idx * sizeof(struct fd5_query_sample)) + \
+ offsetof(struct fd5_query_sample, field), \
+ 0, 0
/* offset of a single field of fd5_query_sample: */
-#define query_sample(aq, field) \
- query_sample_idx(aq, 0, field)
+#define query_sample(aq, field) query_sample_idx(aq, 0, field)
/*
* Occlusion Query:
static void
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
- OUT_RELOC(ring, query_sample(aq, start));
+ OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
+ OUT_RELOC(ring, query_sample(aq, start));
- fd5_event_write(batch, ring, ZPASS_DONE, false);
- fd_reset_wfi(batch);
+ fd5_event_write(batch, ring, ZPASS_DONE, false);
+ fd_reset_wfi(batch);
- fd5_context(batch->ctx)->samples_passed_queries++;
+ fd5_context(batch->ctx)->samples_passed_queries++;
}
static void
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT7(ring, CP_MEM_WRITE, 4);
- OUT_RELOC(ring, query_sample(aq, stop));
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0xffffffff);
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0xffffffff);
- OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
+ OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
- OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
- OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
+ OUT_RELOC(ring, query_sample(aq, stop));
- fd5_event_write(batch, ring, ZPASS_DONE, false);
- fd_reset_wfi(batch);
+ fd5_event_write(batch, ring, ZPASS_DONE, false);
+ fd_reset_wfi(batch);
- OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
- OUT_RING(ring, 0x00000014); // XXX
- OUT_RELOC(ring, query_sample(aq, stop));
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0x00000010); // XXX
+ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
+ OUT_RING(ring, 0x00000014); // XXX
+ OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0x00000010); // XXX
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(ring, query_sample(aq, result)); /* dst */
- OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
- OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
- OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(ring, query_sample(aq, result)); /* dst */
+ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
+ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
- fd5_context(batch->ctx)->samples_passed_queries--;
+ fd5_context(batch->ctx)->samples_passed_queries--;
}
static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd5_query_sample *sp = buf;
- result->u64 = sp->result;
+ struct fd5_query_sample *sp = buf;
+ result->u64 = sp->result;
}
static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd5_query_sample *sp = buf;
- result->b = !!sp->result;
+ struct fd5_query_sample *sp = buf;
+ result->b = !!sp->result;
}
static const struct fd_acc_sample_provider occlusion_counter = {
- .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
- .size = sizeof(struct fd5_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_counter_result,
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_counter_result,
};
static const struct fd_acc_sample_provider occlusion_predicate = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
- .size = sizeof(struct fd5_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_predicate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_predicate_result,
};
static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
- .size = sizeof(struct fd5_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_predicate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_predicate_result,
};
/*
*/
static void
-timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
- CP_EVENT_WRITE_0_TIMESTAMP);
- OUT_RELOC(ring, query_sample(aq, start));
- OUT_RING(ring, 0x00000000);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring,
+ CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
+ OUT_RELOC(ring, query_sample(aq, start));
+ OUT_RING(ring, 0x00000000);
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
}
static void
-timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
-
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
- CP_EVENT_WRITE_0_TIMESTAMP);
- OUT_RELOC(ring, query_sample(aq, stop));
- OUT_RING(ring, 0x00000000);
-
- fd_reset_wfi(batch);
- fd_wfi(batch, ring);
-
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(ring, query_sample(aq, result)); /* dst */
- OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
- OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
- OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
+ struct fd_ringbuffer *ring = batch->draw;
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring,
+ CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
+ OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_RING(ring, 0x00000000);
+
+ fd_reset_wfi(batch);
+ fd_wfi(batch, ring);
+
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(ring, query_sample(aq, result)); /* dst */
+ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
+ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
}
static uint64_t
ticks_to_ns(uint32_t ts)
{
- /* This is based on the 19.2MHz always-on rbbm timer.
- *
- * TODO we should probably query this value from kernel..
- */
- return ts * (1000000000 / 19200000);
+ /* This is based on the 19.2MHz always-on rbbm timer.
+ *
+ * TODO we should probably query this value from kernel..
+ */
+ return ts * (1000000000 / 19200000);
}
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd5_query_sample *sp = buf;
- result->u64 = ticks_to_ns(sp->result);
+ struct fd5_query_sample *sp = buf;
+ result->u64 = ticks_to_ns(sp->result);
}
static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd5_query_sample *sp = buf;
- result->u64 = ticks_to_ns(sp->result);
+ struct fd5_query_sample *sp = buf;
+ result->u64 = ticks_to_ns(sp->result);
}
static const struct fd_acc_sample_provider time_elapsed = {
- .query_type = PIPE_QUERY_TIME_ELAPSED,
- .always = true,
- .size = sizeof(struct fd5_query_sample),
- .resume = timestamp_resume,
- .pause = timestamp_pause,
- .result = time_elapsed_accumulate_result,
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .always = true,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = timestamp_resume,
+ .pause = timestamp_pause,
+ .result = time_elapsed_accumulate_result,
};
/* NOTE: timestamp query isn't going to give terribly sensible results
*/
static const struct fd_acc_sample_provider timestamp = {
- .query_type = PIPE_QUERY_TIMESTAMP,
- .always = true,
- .size = sizeof(struct fd5_query_sample),
- .resume = timestamp_resume,
- .pause = timestamp_pause,
- .result = timestamp_accumulate_result,
+ .query_type = PIPE_QUERY_TIMESTAMP,
+ .always = true,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = timestamp_resume,
+ .pause = timestamp_pause,
+ .result = timestamp_accumulate_result,
};
/*
*/
struct fd_batch_query_entry {
- uint8_t gid; /* group-id */
- uint8_t cid; /* countable-id within the group */
+ uint8_t gid; /* group-id */
+ uint8_t cid; /* countable-id within the group */
};
struct fd_batch_query_data {
- struct fd_screen *screen;
- unsigned num_query_entries;
- struct fd_batch_query_entry query_entries[];
+ struct fd_screen *screen;
+ unsigned num_query_entries;
+ struct fd_batch_query_entry query_entries[];
};
static void
-perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- /* configure performance counters for the requested queries: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
+ /* configure performance counters for the requested queries: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
- debug_assert(counter_idx < g->num_counters);
+ debug_assert(counter_idx < g->num_counters);
- OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
- OUT_RING(ring, g->countables[entry->cid].selector);
- }
+ OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
+ OUT_RING(ring, g->countables[entry->cid].selector);
+ }
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- /* and snapshot the start values */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+ /* and snapshot the start values */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
- OUT_RELOC(ring, query_sample_idx(aq, i, start));
- }
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOC(ring, query_sample_idx(aq, i, start));
+ }
}
static void
-perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
-
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
-
- fd_wfi(batch, ring);
-
- /* TODO do we need to bother to turn anything off? */
-
- /* snapshot the end values: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
-
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
- OUT_RELOC(ring, query_sample_idx(aq, i, stop));
- }
-
- /* and compute the result: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
- OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
- OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
- OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
- }
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
+
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ fd_wfi(batch, ring);
+
+ /* TODO do we need to bother to turn anything off? */
+
+ /* snapshot the end values: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOC(ring, query_sample_idx(aq, i, stop));
+ }
+
+ /* and compute the result: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
+ OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
+ OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
+ }
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd5_query_sample *sp = buf;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd5_query_sample *sp = buf;
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- result->batch[i].u64 = sp[i].result;
- }
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ result->batch[i].u64 = sp[i].result;
+ }
}
static const struct fd_acc_sample_provider perfcntr = {
- .query_type = FD_QUERY_FIRST_PERFCNTR,
- .always = true,
- .resume = perfcntr_resume,
- .pause = perfcntr_pause,
- .result = perfcntr_accumulate_result,
+ .query_type = FD_QUERY_FIRST_PERFCNTR,
+ .always = true,
+ .resume = perfcntr_resume,
+ .pause = perfcntr_pause,
+ .result = perfcntr_accumulate_result,
};
static struct pipe_query *
-fd5_create_batch_query(struct pipe_context *pctx,
- unsigned num_queries, unsigned *query_types)
+fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
+ unsigned *query_types)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_screen *screen = ctx->screen;
- struct fd_query *q;
- struct fd_acc_query *aq;
- struct fd_batch_query_data *data;
-
- data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
- num_queries * sizeof(data->query_entries[0]));
-
- data->screen = screen;
- data->num_query_entries = num_queries;
-
- /* validate the requested query_types and ensure we don't try
- * to request more query_types of a given group than we have
- * counters:
- */
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
-
- for (unsigned i = 0; i < num_queries; i++) {
- unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
-
- /* verify valid query_type, ie. is it actually a perfcntr? */
- if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
- (idx >= screen->num_perfcntr_queries)) {
- mesa_loge("invalid batch query query_type: %u", query_types[i]);
- goto error;
- }
-
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
-
- entry->gid = pq->group_id;
-
- /* the perfcntr_queries[] table flattens all the countables
- * for each group in series, ie:
- *
- * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
- *
- * So to find the countable index just step back through the
- * table to find the first entry with the same group-id.
- */
- while (pq > screen->perfcntr_queries) {
- pq--;
- if (pq->group_id == entry->gid)
- entry->cid++;
- }
-
- if (counters_per_group[entry->gid] >=
- screen->perfcntr_groups[entry->gid].num_counters) {
- mesa_loge("too many counters for group %u\n", entry->gid);
- goto error;
- }
-
- counters_per_group[entry->gid]++;
- }
-
- q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
- aq = fd_acc_query(q);
-
- /* sample buffer size is based on # of queries: */
- aq->size = num_queries * sizeof(struct fd5_query_sample);
- aq->query_data = data;
-
- return (struct pipe_query *)q;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_screen *screen = ctx->screen;
+ struct fd_query *q;
+ struct fd_acc_query *aq;
+ struct fd_batch_query_data *data;
+
+ data = CALLOC_VARIANT_LENGTH_STRUCT(
+ fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
+
+ data->screen = screen;
+ data->num_query_entries = num_queries;
+
+ /* validate the requested query_types and ensure we don't try
+ * to request more query_types of a given group than we have
+ * counters:
+ */
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ for (unsigned i = 0; i < num_queries; i++) {
+ unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+ /* verify valid query_type, ie. is it actually a perfcntr? */
+ if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+ (idx >= screen->num_perfcntr_queries)) {
+ mesa_loge("invalid batch query query_type: %u", query_types[i]);
+ goto error;
+ }
+
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+ entry->gid = pq->group_id;
+
+ /* the perfcntr_queries[] table flattens all the countables
+ * for each group in series, ie:
+ *
+ * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+ *
+ * So to find the countable index just step back through the
+ * table to find the first entry with the same group-id.
+ */
+ while (pq > screen->perfcntr_queries) {
+ pq--;
+ if (pq->group_id == entry->gid)
+ entry->cid++;
+ }
+
+ if (counters_per_group[entry->gid] >=
+ screen->perfcntr_groups[entry->gid].num_counters) {
+ mesa_loge("too many counters for group %u\n", entry->gid);
+ goto error;
+ }
+
+ counters_per_group[entry->gid]++;
+ }
+
+ q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
+ aq = fd_acc_query(q);
+
+ /* sample buffer size is based on # of queries: */
+ aq->size = num_queries * sizeof(struct fd5_query_sample);
+ aq->query_data = data;
+
+ return (struct pipe_query *)q;
error:
- free(data);
- return NULL;
+ free(data);
+ return NULL;
}
void
-fd5_query_context_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->create_query = fd_acc_create_query;
- ctx->query_update_batch = fd_acc_query_update_batch;
+ ctx->create_query = fd_acc_create_query;
+ ctx->query_update_batch = fd_acc_query_update_batch;
- pctx->create_batch_query = fd5_create_batch_query;
+ pctx->create_batch_query = fd5_create_batch_query;
- fd_acc_query_register_provider(pctx, &occlusion_counter);
- fd_acc_query_register_provider(pctx, &occlusion_predicate);
- fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
+ fd_acc_query_register_provider(pctx, &occlusion_counter);
+ fd_acc_query_register_provider(pctx, &occlusion_predicate);
+ fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
- fd_acc_query_register_provider(pctx, &time_elapsed);
- fd_acc_query_register_provider(pctx, ×tamp);
+ fd_acc_query_register_provider(pctx, &time_elapsed);
+ fd_acc_query_register_provider(pctx, ×tamp);
}
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd5_rasterizer.h"
#include "fd5_context.h"
#include "fd5_format.h"
+#include "fd5_rasterizer.h"
void *
fd5_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso)
+ const struct pipe_rasterizer_state *cso)
{
- struct fd5_rasterizer_stateobj *so;
- float psize_min, psize_max;
+ struct fd5_rasterizer_stateobj *so;
+ float psize_min, psize_max;
- so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- if (cso->point_size_per_vertex) {
- psize_min = util_get_min_point_size(cso);
- psize_max = 4092;
- } else {
- /* Force the point size to be as if the vertex output was disabled. */
- psize_min = cso->point_size;
- psize_max = cso->point_size;
- }
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 4092;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
- so->gras_su_point_minmax =
- A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
- A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
- so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
- so->gras_su_poly_offset_scale =
- A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
- so->gras_su_poly_offset_offset =
- A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
- so->gras_su_poly_offset_clamp =
- A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
+ so->gras_su_point_minmax = A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+ A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+ so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
+ so->gras_su_poly_offset_scale =
+ A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+ so->gras_su_poly_offset_offset =
+ A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+ so->gras_su_poly_offset_clamp =
+ A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
- so->gras_su_cntl =
- A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0);
- so->pc_raster_cntl =
- A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
- A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+ so->gras_su_cntl = A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width / 2.0);
+ so->pc_raster_cntl =
+ A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(
+ fd_polygon_mode(cso->fill_front)) |
+ A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
- if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
- cso->fill_back != PIPE_POLYGON_MODE_FILL)
- so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE;
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE;
- if (cso->cull_face & PIPE_FACE_FRONT)
- so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
- if (cso->cull_face & PIPE_FACE_BACK)
- so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
- if (!cso->front_ccw)
- so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
- if (cso->offset_tri)
- so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
+ if (!cso->front_ccw)
+ so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
+ if (cso->offset_tri)
+ so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
- if (!cso->flatshade_first)
- so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST;
+ if (!cso->flatshade_first)
+ so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST;
-// if (!cso->depth_clip)
-// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
-// A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
- if (cso->clip_halfz)
- so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z;
+ // if (!cso->depth_clip)
+ // so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE
+ //| A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
+ if (cso->clip_halfz)
+ so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z;
- return so;
+ return so;
}
#ifndef FD5_RASTERIZER_H_
#define FD5_RASTERIZER_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
struct fd5_rasterizer_stateobj {
- struct pipe_rasterizer_state base;
-
- uint32_t gras_su_point_minmax;
- uint32_t gras_su_point_size;
- uint32_t gras_su_poly_offset_scale;
- uint32_t gras_su_poly_offset_offset;
- uint32_t gras_su_poly_offset_clamp;
-
- uint32_t gras_su_cntl;
- uint32_t gras_cl_clip_cntl;
- uint32_t pc_primitive_cntl;
- uint32_t pc_raster_cntl;
+ struct pipe_rasterizer_state base;
+
+ uint32_t gras_su_point_minmax;
+ uint32_t gras_su_point_size;
+ uint32_t gras_su_poly_offset_scale;
+ uint32_t gras_su_poly_offset_offset;
+ uint32_t gras_su_poly_offset_clamp;
+
+ uint32_t gras_su_cntl;
+ uint32_t gras_cl_clip_cntl;
+ uint32_t pc_primitive_cntl;
+ uint32_t pc_raster_cntl;
};
static inline struct fd5_rasterizer_stateobj *
fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
- return (struct fd5_rasterizer_stateobj *)rast;
+ return (struct fd5_rasterizer_stateobj *)rast;
}
-void * fd5_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso);
+void *fd5_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
#endif /* FD5_RASTERIZER_H_ */
static void
setup_lrz(struct fd_resource *rsc)
{
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
- DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
- unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
- unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ const uint32_t flags =
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+ unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
+ unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
- /* LRZ buffer is super-sampled: */
- switch (rsc->b.b.nr_samples) {
- case 4:
- lrz_pitch *= 2;
- FALLTHROUGH;
- case 2:
- lrz_height *= 2;
- }
+ /* LRZ buffer is super-sampled: */
+ switch (rsc->b.b.nr_samples) {
+ case 4:
+ lrz_pitch *= 2;
+ FALLTHROUGH;
+ case 2:
+ lrz_height *= 2;
+ }
- unsigned size = lrz_pitch * lrz_height * 2;
+ unsigned size = lrz_pitch * lrz_height * 2;
- size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
+ size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
- rsc->lrz_height = lrz_height;
- rsc->lrz_width = lrz_pitch;
- rsc->lrz_pitch = lrz_pitch;
- rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
+ rsc->lrz_height = lrz_height;
+ rsc->lrz_width = lrz_pitch;
+ rsc->lrz_pitch = lrz_pitch;
+ rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
}
uint32_t
fd5_setup_slices(struct fd_resource *rsc)
{
- struct pipe_resource *prsc = &rsc->b.b;
+ struct pipe_resource *prsc = &rsc->b.b;
- if (FD_DBG(LRZ) && has_depth(rsc->b.b.format))
- setup_lrz(rsc);
+ if (FD_DBG(LRZ) && has_depth(rsc->b.b.format))
+ setup_lrz(rsc);
- fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
- prsc->width0, prsc->height0, prsc->depth0,
- prsc->last_level + 1, prsc->array_size,
- prsc->target == PIPE_TEXTURE_3D);
+ fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
+ prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
+ prsc->array_size, prsc->target == PIPE_TEXTURE_3D);
- return rsc->layout.size;
+ return rsc->layout.size;
}
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
-#include "fd5_screen.h"
#include "fd5_blitter.h"
#include "fd5_context.h"
-#include "fd5_format.h"
#include "fd5_emit.h"
+#include "fd5_format.h"
#include "fd5_resource.h"
+#include "fd5_screen.h"
#include "ir3/ir3_compiler.h"
static bool
valid_sample_count(unsigned sample_count)
{
- switch (sample_count) {
- case 0:
- case 1:
- case 2:
- case 4:
- return true;
- default:
- return false;
- }
+ switch (sample_count) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ return true;
+ default:
+ return false;
+ }
}
static bool
fd5_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned usage)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned storage_sample_count, unsigned usage)
{
- unsigned retval = 0;
-
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- !valid_sample_count(sample_count)) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
- util_format_name(format), target, sample_count, usage);
- return false;
- }
-
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
-
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- (fd5_pipe2vtx(format) != VFMT5_NONE)) {
- retval |= PIPE_BIND_VERTEX_BUFFER;
- }
-
- if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
- (fd5_pipe2tex(format) != TFMT5_NONE) &&
- (target == PIPE_BUFFER ||
- util_format_get_blocksize(format) != 12)) {
- retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
- }
-
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED |
- PIPE_BIND_COMPUTE_RESOURCE)) &&
- (fd5_pipe2color(format) != RB5_NONE) &&
- (fd5_pipe2tex(format) != TFMT5_NONE)) {
- retval |= usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED |
- PIPE_BIND_COMPUTE_RESOURCE);
- }
-
- /* For ARB_framebuffer_no_attachments: */
- if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
- retval |= usage & PIPE_BIND_RENDER_TARGET;
- }
-
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) &&
- (fd5_pipe2tex(format) != TFMT5_NONE)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
-
- if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
-
- if (retval != usage) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, "
- "usage=%x, retval=%x", util_format_name(format),
- target, sample_count, usage, retval);
- }
-
- return retval == usage;
+ unsigned retval = 0;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ !valid_sample_count(sample_count)) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return false;
+ }
+
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
+
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd5_pipe2vtx(format) != VFMT5_NONE)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
+ (fd5_pipe2tex(format) != TFMT5_NONE) &&
+ (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
+ retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
+ }
+
+ if ((usage &
+ (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_COMPUTE_RESOURCE)) &&
+ (fd5_pipe2color(format) != RB5_NONE) &&
+ (fd5_pipe2tex(format) != TFMT5_NONE)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
+ PIPE_BIND_COMPUTE_RESOURCE);
+ }
+
+ /* For ARB_framebuffer_no_attachments: */
+ if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
+ retval |= usage & PIPE_BIND_RENDER_TARGET;
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd5_pipe2depth(format) != (enum a5xx_depth_format) ~0) &&
+ (fd5_pipe2tex(format) != TFMT5_NONE)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x",
+ util_format_name(format), target, sample_count, usage, retval);
+ }
+
+ return retval == usage;
}
void
fd5_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = A5XX_MAX_RENDER_TARGETS;
- pscreen->context_create = fd5_context_create;
- pscreen->is_format_supported = fd5_screen_is_format_supported;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->max_rts = A5XX_MAX_RENDER_TARGETS;
+ pscreen->context_create = fd5_context_create;
+ pscreen->is_format_supported = fd5_screen_is_format_supported;
- screen->setup_slices = fd5_setup_slices;
- if (FD_DBG(TTILE))
- screen->tile_mode = fd5_tile_mode;
+ screen->setup_slices = fd5_setup_slices;
+ if (FD_DBG(TTILE))
+ screen->tile_mode = fd5_tile_mode;
- fd5_emit_init_screen(pscreen);
- ir3_screen_init(pscreen);
+ fd5_emit_init_screen(pscreen);
+ ir3_screen_init(pscreen);
}
static inline void
emit_marker5(struct fd_ringbuffer *ring, int scratch_idx)
{
- extern int32_t marker_cnt;
- unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
- if (__EMIT_MARKER) {
- OUT_WFI5(ring);
- OUT_PKT4(ring, reg, 1);
- OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
- }
+ extern int32_t marker_cnt;
+ unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
+ if (__EMIT_MARKER) {
+ OUT_WFI5(ring);
+ OUT_PKT4(ring, reg, 1);
+ OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
+ }
}
#endif /* FD5_SCREEN_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd5_texture.h"
#include "fd5_format.h"
+#include "fd5_texture.h"
static enum a5xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return A5XX_TEX_REPEAT;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return A5XX_TEX_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- *needs_border = true;
- return A5XX_TEX_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- /* only works for PoT.. need to emulate otherwise! */
- return A5XX_TEX_MIRROR_CLAMP;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return A5XX_TEX_MIRROR_REPEAT;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* these two we could perhaps emulate, but we currently
- * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
- */
- default:
- DBG("invalid wrap: %u", wrap);
- return 0;
- }
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A5XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A5XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ *needs_border = true;
+ return A5XX_TEX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ /* only works for PoT.. need to emulate otherwise! */
+ return A5XX_TEX_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A5XX_TEX_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* these two we could perhaps emulate, but we currently
+ * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+ */
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
}
static enum a5xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST:
- return A5XX_TEX_NEAREST;
- case PIPE_TEX_FILTER_LINEAR:
- return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A5XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static void *
fd5_sampler_state_create(struct pipe_context *pctx,
- const struct pipe_sampler_state *cso)
+ const struct pipe_sampler_state *cso)
{
- struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
- unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
- bool miplinear = false;
-
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- miplinear = true;
-
- so->needs_border = false;
- so->texsamp0 =
- COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
- A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
- A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
- A5XX_TEX_SAMP_0_ANISO(aniso) |
- A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
- A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
- A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
-
- so->texsamp1 =
- COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
- COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
-
- so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
-
- if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
- so->texsamp1 |=
- A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
- A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
- } else {
- /* If we're not doing mipmap filtering, we still need a slightly > 0
- * LOD clamp so the HW can decide between min and mag filtering of
- * level 0.
- */
- so->texsamp1 |=
- A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
- A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
- }
-
- if (cso->compare_mode)
- so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
-
- return so;
+ struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
+ unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+ bool miplinear = false;
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ miplinear = true;
+
+ so->needs_border = false;
+ so->texsamp0 =
+ COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
+ A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+ A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+ A5XX_TEX_SAMP_0_ANISO(aniso) |
+ A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
+ A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
+ A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
+
+ so->texsamp1 =
+ COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
+ COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
+
+ so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
+
+ if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+ A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+ } else {
+ /* If we're not doing mipmap filtering, we still need a slightly > 0
+ * LOD clamp so the HW can decide between min and mag filtering of
+ * level 0.
+ */
+ so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
+ A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
+ }
+
+ if (cso->compare_mode)
+ so->texsamp1 |=
+ A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+ return so;
}
static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{
- return false; // TODO check if this is still needed on a5xx
+ return false; // TODO check if this is still needed on a5xx
}
static struct pipe_sampler_view *
fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
- const struct pipe_sampler_view *cso)
+ const struct pipe_sampler_view *cso)
{
- struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format format = cso->format;
- unsigned lvl, layers = 0;
-
- if (!so)
- return NULL;
-
- if (format == PIPE_FORMAT_X32_S8X24_UINT) {
- rsc = rsc->stencil;
- format = rsc->b.b.format;
- }
-
- so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
- so->base.reference.count = 1;
- so->base.context = pctx;
-
- so->texconst0 =
- A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
- A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
- fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a);
-
- /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
- * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
- * way to re-arrange things so stencil component is where the swiz
- * expects.
- *
- * Note that gallium expects stencil sampler to return (s,s,s,s)
- * which isn't quite true. To make that happen we'd have to massage
- * the swizzle. But in practice only the .x component is used.
- */
- if (format == PIPE_FORMAT_X24S8_UINT) {
- so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
- }
-
- if (util_format_is_srgb(format)) {
- if (use_astc_srgb_workaround(pctx, format))
- so->astc_srgb = true;
- so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
- }
-
- if (cso->target == PIPE_BUFFER) {
- unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
-
- lvl = 0;
- so->texconst1 =
- A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
- A5XX_TEX_CONST_1_HEIGHT(elements >> 15);
- so->texconst2 =
- A5XX_TEX_CONST_2_UNK4 |
- A5XX_TEX_CONST_2_UNK31;
- so->offset = cso->u.buf.offset;
- } else {
- unsigned miplevels;
-
- lvl = fd_sampler_first_level(cso);
- miplevels = fd_sampler_last_level(cso) - lvl;
- layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
-
- so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
- so->texconst1 =
- A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
- A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
- so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
- }
-
- so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
-
- switch (cso->target) {
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_2D:
- so->texconst3 =
- A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A5XX_TEX_CONST_5_DEPTH(1);
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- so->texconst3 =
- A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A5XX_TEX_CONST_5_DEPTH(layers);
- break;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- so->texconst3 =
- A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A5XX_TEX_CONST_5_DEPTH(layers / 6);
- break;
- case PIPE_TEXTURE_3D:
- so->texconst3 =
- A5XX_TEX_CONST_3_MIN_LAYERSZ(
- fd_resource_slice(rsc, prsc->last_level)->size0) |
- A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
- so->texconst5 =
- A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
- break;
- default:
- so->texconst3 = 0x00000000;
- break;
- }
-
- return &so->base;
+ struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format format = cso->format;
+ unsigned lvl, layers = 0;
+
+ if (!so)
+ return NULL;
+
+ if (format == PIPE_FORMAT_X32_S8X24_UINT) {
+ rsc = rsc->stencil;
+ format = rsc->b.b.format;
+ }
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->texconst0 = A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
+ A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
+ fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
+ /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
+ * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
+ * way to re-arrange things so stencil component is where the swiz
+ * expects.
+ *
+ * Note that gallium expects stencil sampler to return (s,s,s,s)
+ * which isn't quite true. To make that happen we'd have to massage
+ * the swizzle. But in practice only the .x component is used.
+ */
+ if (format == PIPE_FORMAT_X24S8_UINT) {
+ so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
+ }
+
+ if (util_format_is_srgb(format)) {
+ if (use_astc_srgb_workaround(pctx, format))
+ so->astc_srgb = true;
+ so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
+ }
+
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
+
+ lvl = 0;
+ so->texconst1 = A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
+ A5XX_TEX_CONST_1_HEIGHT(elements >> 15);
+ so->texconst2 = A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31;
+ so->offset = cso->u.buf.offset;
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 = A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 = A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
+ A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
+ so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+
+ so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
+
+ switch (cso->target) {
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A5XX_TEX_CONST_5_DEPTH(1);
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers / 6);
+ break;
+ case PIPE_TEXTURE_3D:
+ so->texconst3 =
+ A5XX_TEX_CONST_3_MIN_LAYERSZ(
+ fd_resource_slice(rsc, prsc->last_level)->size0) |
+ A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
+ so->texconst5 = A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
+ break;
+ default:
+ so->texconst3 = 0x00000000;
+ break;
+ }
+
+ return &so->base;
}
static void
fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
- unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
+ unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd5_context *fd5_ctx = fd5_context(ctx);
- uint16_t astc_srgb = 0;
- unsigned i;
-
- for (i = 0; i < nr; i++) {
- if (views[i]) {
- struct fd5_pipe_sampler_view *view =
- fd5_pipe_sampler_view(views[i]);
- if (view->astc_srgb)
- astc_srgb |= (1 << i);
- }
- }
-
- fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
-
- if (shader == PIPE_SHADER_FRAGMENT) {
- fd5_ctx->fastc_srgb = astc_srgb;
- } else if (shader == PIPE_SHADER_VERTEX) {
- fd5_ctx->vastc_srgb = astc_srgb;
- }
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ uint16_t astc_srgb = 0;
+ unsigned i;
+
+ for (i = 0; i < nr; i++) {
+ if (views[i]) {
+ struct fd5_pipe_sampler_view *view = fd5_pipe_sampler_view(views[i]);
+ if (view->astc_srgb)
+ astc_srgb |= (1 << i);
+ }
+ }
+
+ fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
+ views);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ fd5_ctx->fastc_srgb = astc_srgb;
+ } else if (shader == PIPE_SHADER_VERTEX) {
+ fd5_ctx->vastc_srgb = astc_srgb;
+ }
}
void
fd5_texture_init(struct pipe_context *pctx)
{
- pctx->create_sampler_state = fd5_sampler_state_create;
- pctx->bind_sampler_states = fd_sampler_states_bind;
- pctx->create_sampler_view = fd5_sampler_view_create;
- pctx->set_sampler_views = fd5_set_sampler_views;
+ pctx->create_sampler_state = fd5_sampler_state_create;
+ pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->create_sampler_view = fd5_sampler_view_create;
+ pctx->set_sampler_views = fd5_set_sampler_views;
}
#include "pipe/p_context.h"
-#include "freedreno_texture.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "fd5_context.h"
#include "fd5_format.h"
struct fd5_sampler_stateobj {
- struct pipe_sampler_state base;
- uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
- bool needs_border;
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
+ bool needs_border;
};
static inline struct fd5_sampler_stateobj *
fd5_sampler_stateobj(struct pipe_sampler_state *samp)
{
- return (struct fd5_sampler_stateobj *)samp;
+ return (struct fd5_sampler_stateobj *)samp;
}
struct fd5_pipe_sampler_view {
- struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
- uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
- uint32_t offset;
- bool astc_srgb;
+ struct pipe_sampler_view base;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
+ uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
+ uint32_t offset;
+ bool astc_srgb;
};
static inline struct fd5_pipe_sampler_view *
fd5_pipe_sampler_view(struct pipe_sampler_view *pview)
{
- return (struct fd5_pipe_sampler_view *)pview;
+ return (struct fd5_pipe_sampler_view *)pview;
}
void fd5_texture_init(struct pipe_context *pctx);
-
static inline enum a5xx_tex_type
fd5_tex_type(unsigned target)
{
- switch (target) {
- default:
- assert(0);
- case PIPE_BUFFER:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return A5XX_TEX_1D;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- return A5XX_TEX_2D;
- case PIPE_TEXTURE_3D:
- return A5XX_TEX_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return A5XX_TEX_CUBE;
- }
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A5XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A5XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A5XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A5XX_TEX_CUBE;
+ }
}
#endif /* FD5_TEXTURE_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd5_zsa.h"
#include "fd5_context.h"
#include "fd5_format.h"
+#include "fd5_zsa.h"
void *
fd5_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso)
+ const struct pipe_depth_stencil_alpha_state *cso)
{
- struct fd5_zsa_stateobj *so;
-
- so = CALLOC_STRUCT(fd5_zsa_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- switch (cso->depth_func) {
- case PIPE_FUNC_LESS:
- case PIPE_FUNC_LEQUAL:
- so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
- break;
-
- case PIPE_FUNC_GREATER:
- case PIPE_FUNC_GEQUAL:
- so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
- break;
-
- default:
- /* LRZ not enabled */
- so->gras_lrz_cntl = 0;
- break;
- }
-
- if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask))
- so->lrz_write = true;
-
- so->rb_depth_cntl |=
- A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
-
- if (cso->depth_enabled)
- so->rb_depth_cntl |=
- A5XX_RB_DEPTH_CNTL_Z_ENABLE |
- A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
-
- if (cso->depth_writemask)
- so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
-
- if (cso->stencil[0].enabled) {
- const struct pipe_stencil_state *s = &cso->stencil[0];
-
- so->rb_stencil_control |=
- A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
- A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
- A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
- A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
- A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
- A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
- so->rb_stencilrefmask |=
- A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
- A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
-
- if (cso->stencil[1].enabled) {
- const struct pipe_stencil_state *bs = &cso->stencil[1];
-
- so->rb_stencil_control |=
- A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
- A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
- A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
- A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
- A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
- so->rb_stencilrefmask_bf |=
- A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
- A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
- }
- }
-
- if (cso->alpha_enabled) {
- uint32_t ref = cso->alpha_ref_value * 255.0;
- so->rb_alpha_control =
- A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
- A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
- A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
-// so->rb_depth_control |=
-// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
- }
-
- return so;
+ struct fd5_zsa_stateobj *so;
+
+ so = CALLOC_STRUCT(fd5_zsa_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ switch (cso->depth_func) {
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
+ break;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ so->gras_lrz_cntl =
+ A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
+ break;
+
+ default:
+ /* LRZ not enabled */
+ so->gras_lrz_cntl = 0;
+ break;
+ }
+
+ if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask))
+ so->lrz_write = true;
+
+ so->rb_depth_cntl |=
+ A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
+
+ if (cso->depth_enabled)
+ so->rb_depth_cntl |=
+ A5XX_RB_DEPTH_CNTL_Z_ENABLE | A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
+ if (cso->depth_writemask)
+ so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
+
+ so->rb_stencil_control |=
+ A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
+ A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencilrefmask |=
+ A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+ so->rb_stencil_control |=
+ A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+ A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+ }
+ }
+
+ if (cso->alpha_enabled) {
+ uint32_t ref = cso->alpha_ref_value * 255.0;
+ so->rb_alpha_control =
+ A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+ A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
+ A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
+ // so->rb_depth_control |=
+ // A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+
+ return so;
}
#ifndef FD5_ZSA_H_
#define FD5_ZSA_H_
-
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
struct fd5_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state base;
-
- uint32_t rb_alpha_control;
- uint32_t rb_depth_cntl;
- uint32_t rb_stencil_control;
- uint32_t rb_stencilrefmask;
- uint32_t rb_stencilrefmask_bf;
- uint32_t gras_lrz_cntl;
- bool lrz_write;
+ struct pipe_depth_stencil_alpha_state base;
+
+ uint32_t rb_alpha_control;
+ uint32_t rb_depth_cntl;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
+ uint32_t gras_lrz_cntl;
+ bool lrz_write;
};
static inline struct fd5_zsa_stateobj *
fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
- return (struct fd5_zsa_stateobj *)zsa;
+ return (struct fd5_zsa_stateobj *)zsa;
}
-void * fd5_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso);
+void *fd5_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD5_ZSA_H_ */
#include "pipe/p_state.h"
#include "util/u_blend.h"
#include "util/u_dual_blend.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "fd6_blend.h"
#include "fd6_context.h"
static enum a3xx_rb_blend_opcode
blend_func(unsigned func)
{
- switch (func) {
- case PIPE_BLEND_ADD:
- return BLEND_DST_PLUS_SRC;
- case PIPE_BLEND_MIN:
- return BLEND_MIN_DST_SRC;
- case PIPE_BLEND_MAX:
- return BLEND_MAX_DST_SRC;
- case PIPE_BLEND_SUBTRACT:
- return BLEND_SRC_MINUS_DST;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLEND_DST_MINUS_SRC;
- default:
- DBG("invalid blend func: %x", func);
- return 0;
- }
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
}
struct fd6_blend_variant *
-__fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, unsigned sample_mask)
+__fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
+ unsigned sample_mask)
{
- const struct pipe_blend_state *cso = &blend->base;
- struct fd6_blend_variant *so;
- enum a3xx_rop_code rop = ROP_COPY;
- bool reads_dest = false;
- unsigned mrt_blend = 0;
-
- if (cso->logicop_enable) {
- rop = cso->logicop_func; /* maps 1:1 */
- reads_dest = util_logicop_reads_dest(cso->logicop_func);
- }
-
- so = rzalloc_size(blend, sizeof(*so));
- if (!so)
- return NULL;
-
- struct fd_ringbuffer *ring = fd_ringbuffer_new_object(blend->ctx->pipe,
- ((A6XX_MAX_RENDER_TARGETS * 4) + 6) * 4);
- so->stateobj = ring;
-
- for (unsigned i = 0; i <= cso->max_rt; i++) {
- const struct pipe_rt_blend_state *rt;
-
- if (cso->independent_blend_enable)
- rt = &cso->rt[i];
- else
- rt = &cso->rt[0];
-
- OUT_REG(ring, A6XX_RB_MRT_BLEND_CONTROL(i,
- .rgb_src_factor = fd_blend_factor(rt->rgb_src_factor),
- .rgb_blend_opcode = blend_func(rt->rgb_func),
- .rgb_dest_factor = fd_blend_factor(rt->rgb_dst_factor),
- .alpha_src_factor = fd_blend_factor(rt->alpha_src_factor),
- .alpha_blend_opcode = blend_func(rt->alpha_func),
- .alpha_dest_factor = fd_blend_factor(rt->alpha_dst_factor),
- ));
-
- OUT_REG(ring, A6XX_RB_MRT_CONTROL(i,
- .rop_code = rop,
- .rop_enable = cso->logicop_enable,
- .component_enable = rt->colormask,
- .blend = rt->blend_enable,
- .blend2 = rt->blend_enable,
- ));
-
- if (rt->blend_enable) {
- mrt_blend |= (1 << i);
- }
-
- if (reads_dest) {
- mrt_blend |= (1 << i);
- }
- }
-
- OUT_REG(ring, A6XX_RB_DITHER_CNTL(
- .dither_mode_mrt0 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt1 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt2 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt3 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt4 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt5 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt6 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- .dither_mode_mrt7 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
- ));
-
- OUT_REG(ring, A6XX_SP_BLEND_CNTL(
- .unk8 = true,
- .alpha_to_coverage = cso->alpha_to_coverage,
- .enabled = !!mrt_blend,
- .dual_color_in_enable = blend->use_dual_src_blend,
- ));
-
- OUT_REG(ring, A6XX_RB_BLEND_CNTL(
- .enable_blend = mrt_blend,
- .alpha_to_coverage = cso->alpha_to_coverage,
- .alpha_to_one = cso->alpha_to_one,
- .independent_blend = cso->independent_blend_enable,
- .sample_mask = sample_mask,
- .dual_color_in_enable = blend->use_dual_src_blend,
- ));
-
- so->sample_mask = sample_mask;
-
- util_dynarray_append(&blend->variants, struct fd6_blend_variant *, so);
-
- return so;
+ const struct pipe_blend_state *cso = &blend->base;
+ struct fd6_blend_variant *so;
+ enum a3xx_rop_code rop = ROP_COPY;
+ bool reads_dest = false;
+ unsigned mrt_blend = 0;
+
+ if (cso->logicop_enable) {
+ rop = cso->logicop_func; /* maps 1:1 */
+ reads_dest = util_logicop_reads_dest(cso->logicop_func);
+ }
+
+ so = rzalloc_size(blend, sizeof(*so));
+ if (!so)
+ return NULL;
+
+ struct fd_ringbuffer *ring = fd_ringbuffer_new_object(
+ blend->ctx->pipe, ((A6XX_MAX_RENDER_TARGETS * 4) + 6) * 4);
+ so->stateobj = ring;
+
+ for (unsigned i = 0; i <= cso->max_rt; i++) {
+ const struct pipe_rt_blend_state *rt;
+
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
+
+ OUT_REG(ring,
+ A6XX_RB_MRT_BLEND_CONTROL(
+ i, .rgb_src_factor = fd_blend_factor(rt->rgb_src_factor),
+ .rgb_blend_opcode = blend_func(rt->rgb_func),
+ .rgb_dest_factor = fd_blend_factor(rt->rgb_dst_factor),
+ .alpha_src_factor = fd_blend_factor(rt->alpha_src_factor),
+ .alpha_blend_opcode = blend_func(rt->alpha_func),
+ .alpha_dest_factor = fd_blend_factor(rt->alpha_dst_factor), ));
+
+ OUT_REG(ring, A6XX_RB_MRT_CONTROL(i, .rop_code = rop,
+ .rop_enable = cso->logicop_enable,
+ .component_enable = rt->colormask,
+ .blend = rt->blend_enable,
+ .blend2 = rt->blend_enable, ));
+
+ if (rt->blend_enable) {
+ mrt_blend |= (1 << i);
+ }
+
+ if (reads_dest) {
+ mrt_blend |= (1 << i);
+ }
+ }
+
+ OUT_REG(
+ ring,
+ A6XX_RB_DITHER_CNTL(
+ .dither_mode_mrt0 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt1 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt2 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt3 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt4 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt5 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt6 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
+ .dither_mode_mrt7 =
+ cso->dither ? DITHER_ALWAYS : DITHER_DISABLE, ));
+
+ OUT_REG(ring, A6XX_SP_BLEND_CNTL(.unk8 = true,
+ .alpha_to_coverage = cso->alpha_to_coverage,
+ .enabled = !!mrt_blend,
+ .dual_color_in_enable =
+ blend->use_dual_src_blend, ));
+
+ OUT_REG(
+ ring,
+ A6XX_RB_BLEND_CNTL(.enable_blend = mrt_blend,
+ .alpha_to_coverage = cso->alpha_to_coverage,
+ .alpha_to_one = cso->alpha_to_one,
+ .independent_blend = cso->independent_blend_enable,
+ .sample_mask = sample_mask,
+ .dual_color_in_enable = blend->use_dual_src_blend, ));
+
+ so->sample_mask = sample_mask;
+
+ util_dynarray_append(&blend->variants, struct fd6_blend_variant *, so);
+
+ return so;
}
void *
fd6_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso)
+ const struct pipe_blend_state *cso)
{
- struct fd6_blend_stateobj *so;
-
- so = rzalloc_size(NULL, sizeof(*so));
- if (!so)
- return NULL;
-
- so->base = *cso;
- so->ctx = fd_context(pctx);
-
- if (cso->logicop_enable) {
- so->reads_dest |= util_logicop_reads_dest(cso->logicop_func);
- }
-
- so->use_dual_src_blend =
- cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0);
-
- unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0;
- for (unsigned i = 0; i <= nr; i++) {
- const struct pipe_rt_blend_state *rt = &cso->rt[i];
-
- so->reads_dest |= rt->blend_enable;
-
- /* From the PoV of LRZ, having masked color channels is
- * the same as having blend enabled, in that the draw will
- * care about the fragments from an earlier draw.
- *
- * NOTE we actually don't care about masked color channels
- * that don't actually exist in the render target, but we
- * don't know the render target format here to determine
- * that. It is probably not worth worrying about, but if
- * we find a game/benchmark that goes out of it's way to
- * mask off non-existent channels, we should fixup the
- * pipe_blend_state to give us more info.
- */
- if (rt->blend_enable || (rt->colormask != 0xf)) {
- so->reads_dest = true;
- }
- }
-
- util_dynarray_init(&so->variants, so);
-
- return so;
+ struct fd6_blend_stateobj *so;
+
+ so = rzalloc_size(NULL, sizeof(*so));
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ so->ctx = fd_context(pctx);
+
+ if (cso->logicop_enable) {
+ so->reads_dest |= util_logicop_reads_dest(cso->logicop_func);
+ }
+
+ so->use_dual_src_blend =
+ cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0);
+
+ unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0;
+ for (unsigned i = 0; i <= nr; i++) {
+ const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+ so->reads_dest |= rt->blend_enable;
+
+ /* From the PoV of LRZ, having masked color channels is
+ * the same as having blend enabled, in that the draw will
+ * care about the fragments from an earlier draw.
+ *
+ * NOTE we actually don't care about masked color channels
+ * that don't actually exist in the render target, but we
+ * don't know the render target format here to determine
+ * that. It is probably not worth worrying about, but if
+ * we find a game/benchmark that goes out of it's way to
+ * mask off non-existent channels, we should fixup the
+ * pipe_blend_state to give us more info.
+ */
+ if (rt->blend_enable || (rt->colormask != 0xf)) {
+ so->reads_dest = true;
+ }
+ }
+
+ util_dynarray_init(&so->variants, so);
+
+ return so;
}
void
fd6_blend_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd6_blend_stateobj *so = hwcso;
+ struct fd6_blend_stateobj *so = hwcso;
- util_dynarray_foreach(&so->variants, struct fd6_blend_variant *, vp) {
- struct fd6_blend_variant *v = *vp;
- fd_ringbuffer_del(v->stateobj);
- }
+ util_dynarray_foreach (&so->variants, struct fd6_blend_variant *, vp) {
+ struct fd6_blend_variant *v = *vp;
+ fd_ringbuffer_del(v->stateobj);
+ }
- ralloc_free(so);
+ ralloc_free(so);
}
#ifndef FD6_BLEND_H_
#define FD6_BLEND_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_context.h"
#include "freedreno_util.h"
* to change frequently.
*/
struct fd6_blend_variant {
- unsigned sample_mask;
- struct fd_ringbuffer *stateobj;
+ unsigned sample_mask;
+ struct fd_ringbuffer *stateobj;
};
struct fd6_blend_stateobj {
- struct pipe_blend_state base;
+ struct pipe_blend_state base;
- bool use_dual_src_blend;
+ bool use_dual_src_blend;
- struct fd_context *ctx;
- bool reads_dest;
- struct util_dynarray variants;
+ struct fd_context *ctx;
+ bool reads_dest;
+ struct util_dynarray variants;
};
static inline struct fd6_blend_stateobj *
fd6_blend_stateobj(struct pipe_blend_state *blend)
{
- return (struct fd6_blend_stateobj *)blend;
+ return (struct fd6_blend_stateobj *)blend;
}
-struct fd6_blend_variant * __fd6_setup_blend_variant(
- struct fd6_blend_stateobj *blend, unsigned sample_mask);
+struct fd6_blend_variant *
+__fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
+ unsigned sample_mask);
static inline struct fd6_blend_variant *
-fd6_blend_variant(struct pipe_blend_state *cso,
- unsigned nr_samples, unsigned sample_mask)
+fd6_blend_variant(struct pipe_blend_state *cso, unsigned nr_samples,
+ unsigned sample_mask)
{
- struct fd6_blend_stateobj *blend = fd6_blend_stateobj(cso);
- unsigned mask = BITFIELD_MASK(nr_samples);
+ struct fd6_blend_stateobj *blend = fd6_blend_stateobj(cso);
+ unsigned mask = BITFIELD_MASK(nr_samples);
- util_dynarray_foreach(&blend->variants, struct fd6_blend_variant *, vp) {
- struct fd6_blend_variant *v = *vp;
+ util_dynarray_foreach (&blend->variants, struct fd6_blend_variant *, vp) {
+ struct fd6_blend_variant *v = *vp;
- /* mask out sample-mask bits that we don't care about to avoid
- * creating unnecessary variants
- */
- if ((mask & v->sample_mask) == (mask & sample_mask)) {
- return v;
- }
- }
+ /* mask out sample-mask bits that we don't care about to avoid
+ * creating unnecessary variants
+ */
+ if ((mask & v->sample_mask) == (mask & sample_mask)) {
+ return v;
+ }
+ }
- return __fd6_setup_blend_variant(blend, sample_mask);
+ return __fd6_setup_blend_variant(blend, sample_mask);
}
-void * fd6_blend_state_create(struct pipe_context *pctx,
- const struct pipe_blend_state *cso);
+void *fd6_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
void fd6_blend_state_delete(struct pipe_context *, void *hwcso);
#endif /* FD6_BLEND_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-#include "util/u_dump.h"
-#include "util/half_float.h"
#include "util/format_srgb.h"
+#include "util/half_float.h"
+#include "util/u_dump.h"
#include "freedreno_blitter.h"
#include "freedreno_fence.h"
#include "freedreno_tracepoints.h"
#include "fd6_blitter.h"
-#include "fd6_format.h"
#include "fd6_emit.h"
+#include "fd6_format.h"
#include "fd6_resource.h"
static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)
{
- switch (fmt) {
- case FMT6_A8_UNORM:
- case FMT6_8_UNORM:
- case FMT6_8_SNORM:
- case FMT6_8_8_UNORM:
- case FMT6_8_8_SNORM:
- case FMT6_8_8_8_8_UNORM:
- case FMT6_8_8_8_X8_UNORM:
- case FMT6_8_8_8_8_SNORM:
- case FMT6_4_4_4_4_UNORM:
- case FMT6_5_5_5_1_UNORM:
- case FMT6_5_6_5_UNORM:
- return R2D_UNORM8;
-
- case FMT6_32_UINT:
- case FMT6_32_SINT:
- case FMT6_32_32_UINT:
- case FMT6_32_32_SINT:
- case FMT6_32_32_32_32_UINT:
- case FMT6_32_32_32_32_SINT:
- return R2D_INT32;
-
- case FMT6_16_UINT:
- case FMT6_16_SINT:
- case FMT6_16_16_UINT:
- case FMT6_16_16_SINT:
- case FMT6_16_16_16_16_UINT:
- case FMT6_16_16_16_16_SINT:
- case FMT6_10_10_10_2_UINT:
- return R2D_INT16;
-
- case FMT6_8_UINT:
- case FMT6_8_SINT:
- case FMT6_8_8_UINT:
- case FMT6_8_8_SINT:
- case FMT6_8_8_8_8_UINT:
- case FMT6_8_8_8_8_SINT:
- case FMT6_Z24_UNORM_S8_UINT:
- case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
- return R2D_INT8;
-
- case FMT6_16_UNORM:
- case FMT6_16_SNORM:
- case FMT6_16_16_UNORM:
- case FMT6_16_16_SNORM:
- case FMT6_16_16_16_16_UNORM:
- case FMT6_16_16_16_16_SNORM:
- case FMT6_32_FLOAT:
- case FMT6_32_32_FLOAT:
- case FMT6_32_32_32_32_FLOAT:
- return R2D_FLOAT32;
-
- case FMT6_16_FLOAT:
- case FMT6_16_16_FLOAT:
- case FMT6_16_16_16_16_FLOAT:
- case FMT6_11_11_10_FLOAT:
- case FMT6_10_10_10_2_UNORM_DEST:
- return R2D_FLOAT16;
-
- default:
- unreachable("bad format");
- return 0;
- }
+ switch (fmt) {
+ case FMT6_A8_UNORM:
+ case FMT6_8_UNORM:
+ case FMT6_8_SNORM:
+ case FMT6_8_8_UNORM:
+ case FMT6_8_8_SNORM:
+ case FMT6_8_8_8_8_UNORM:
+ case FMT6_8_8_8_X8_UNORM:
+ case FMT6_8_8_8_8_SNORM:
+ case FMT6_4_4_4_4_UNORM:
+ case FMT6_5_5_5_1_UNORM:
+ case FMT6_5_6_5_UNORM:
+ return R2D_UNORM8;
+
+ case FMT6_32_UINT:
+ case FMT6_32_SINT:
+ case FMT6_32_32_UINT:
+ case FMT6_32_32_SINT:
+ case FMT6_32_32_32_32_UINT:
+ case FMT6_32_32_32_32_SINT:
+ return R2D_INT32;
+
+ case FMT6_16_UINT:
+ case FMT6_16_SINT:
+ case FMT6_16_16_UINT:
+ case FMT6_16_16_SINT:
+ case FMT6_16_16_16_16_UINT:
+ case FMT6_16_16_16_16_SINT:
+ case FMT6_10_10_10_2_UINT:
+ return R2D_INT16;
+
+ case FMT6_8_UINT:
+ case FMT6_8_SINT:
+ case FMT6_8_8_UINT:
+ case FMT6_8_8_SINT:
+ case FMT6_8_8_8_8_UINT:
+ case FMT6_8_8_8_8_SINT:
+ case FMT6_Z24_UNORM_S8_UINT:
+ case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+ return R2D_INT8;
+
+ case FMT6_16_UNORM:
+ case FMT6_16_SNORM:
+ case FMT6_16_16_UNORM:
+ case FMT6_16_16_SNORM:
+ case FMT6_16_16_16_16_UNORM:
+ case FMT6_16_16_16_16_SNORM:
+ case FMT6_32_FLOAT:
+ case FMT6_32_32_FLOAT:
+ case FMT6_32_32_32_32_FLOAT:
+ return R2D_FLOAT32;
+
+ case FMT6_16_FLOAT:
+ case FMT6_16_16_FLOAT:
+ case FMT6_16_16_16_16_FLOAT:
+ case FMT6_11_11_10_FLOAT:
+ case FMT6_10_10_10_2_UNORM_DEST:
+ return R2D_FLOAT16;
+
+ default:
+ unreachable("bad format");
+ return 0;
+ }
}
/* Make sure none of the requested dimensions extend beyond the size of the
static bool
ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
{
- int last_layer =
- r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl)
- : r->array_size;
+ int last_layer =
+ r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
- return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
- (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
- (b->z >= 0) && (b->z + b->depth <= last_layer);
+ return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
+ (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
+ (b->z >= 0) && (b->z + b->depth <= last_layer);
}
static bool
ok_format(enum pipe_format pfmt)
{
- enum a6xx_format fmt = fd6_pipe2color(pfmt);
-
- if (util_format_is_compressed(pfmt))
- return true;
-
- switch (pfmt) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
- case PIPE_FORMAT_Z32_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- case PIPE_FORMAT_S8_UINT:
- return true;
- default:
- break;
- }
-
- if (fmt == FMT6_NONE)
- return false;
-
- return true;
+ enum a6xx_format fmt = fd6_pipe2color(pfmt);
+
+ if (util_format_is_compressed(pfmt))
+ return true;
+
+ switch (pfmt) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_S8_UINT:
+ return true;
+ default:
+ break;
+ }
+
+ if (fmt == FMT6_NONE)
+ return false;
+
+ return true;
}
-#define DEBUG_BLIT 0
+#define DEBUG_BLIT 0
#define DEBUG_BLIT_FALLBACK 0
-#define fail_if(cond) \
- do { \
- if (cond) { \
- if (DEBUG_BLIT_FALLBACK) { \
- fprintf(stderr, "falling back: %s for blit:\n", #cond); \
- dump_blit_info(info); \
- } \
- return false; \
- } \
- } while (0)
+#define fail_if(cond) \
+ do { \
+ if (cond) { \
+ if (DEBUG_BLIT_FALLBACK) { \
+ fprintf(stderr, "falling back: %s for blit:\n", #cond); \
+ dump_blit_info(info); \
+ } \
+ return false; \
+ } \
+ } while (0)
static bool
is_ubwc(struct pipe_resource *prsc, unsigned level)
{
- return fd_resource_ubwc_enabled(fd_resource(prsc), level);
+ return fd_resource_ubwc_enabled(fd_resource(prsc), level);
}
static void
dump_blit_info(const struct pipe_blit_info *info)
{
- util_dump_blit_info(stderr, info);
- fprintf(stderr, "\ndst resource: ");
- util_dump_resource(stderr, info->dst.resource);
- if (is_ubwc(info->dst.resource, info->dst.level))
- fprintf(stderr, " (ubwc)");
- fprintf(stderr, "\nsrc resource: ");
- util_dump_resource(stderr, info->src.resource);
- if (is_ubwc(info->src.resource, info->src.level))
- fprintf(stderr, " (ubwc)");
- fprintf(stderr, "\n");
+ util_dump_blit_info(stderr, info);
+ fprintf(stderr, "\ndst resource: ");
+ util_dump_resource(stderr, info->dst.resource);
+ if (is_ubwc(info->dst.resource, info->dst.level))
+ fprintf(stderr, " (ubwc)");
+ fprintf(stderr, "\nsrc resource: ");
+ util_dump_resource(stderr, info->src.resource);
+ if (is_ubwc(info->src.resource, info->src.level))
+ fprintf(stderr, " (ubwc)");
+ fprintf(stderr, "\n");
}
static bool
can_do_blit(const struct pipe_blit_info *info)
{
- /* I think we can do scaling, but not in z dimension since that would
- * require blending..
- */
- fail_if(info->dst.box.depth != info->src.box.depth);
+ /* I think we can do scaling, but not in z dimension since that would
+ * require blending..
+ */
+ fail_if(info->dst.box.depth != info->src.box.depth);
- /* Fail if unsupported format: */
- fail_if(!ok_format(info->src.format));
- fail_if(!ok_format(info->dst.format));
+ /* Fail if unsupported format: */
+ fail_if(!ok_format(info->src.format));
+ fail_if(!ok_format(info->dst.format));
- debug_assert(!util_format_is_compressed(info->src.format));
- debug_assert(!util_format_is_compressed(info->dst.format));
+ debug_assert(!util_format_is_compressed(info->src.format));
+ debug_assert(!util_format_is_compressed(info->dst.format));
- fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
+ fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
- fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
+ fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
- debug_assert(info->dst.box.width >= 0);
- debug_assert(info->dst.box.height >= 0);
- debug_assert(info->dst.box.depth >= 0);
+ debug_assert(info->dst.box.width >= 0);
+ debug_assert(info->dst.box.height >= 0);
+ debug_assert(info->dst.box.depth >= 0);
- fail_if(info->dst.resource->nr_samples > 1);
+ fail_if(info->dst.resource->nr_samples > 1);
- fail_if(info->window_rectangle_include);
+ fail_if(info->window_rectangle_include);
- const struct util_format_description *src_desc =
- util_format_description(info->src.format);
- const struct util_format_description *dst_desc =
- util_format_description(info->dst.format);
- const int common_channels = MIN2(src_desc->nr_channels, dst_desc->nr_channels);
+ const struct util_format_description *src_desc =
+ util_format_description(info->src.format);
+ const struct util_format_description *dst_desc =
+ util_format_description(info->dst.format);
+ const int common_channels =
+ MIN2(src_desc->nr_channels, dst_desc->nr_channels);
- if (info->mask & PIPE_MASK_RGBA) {
- for (int i = 0; i < common_channels; i++) {
- fail_if(memcmp(&src_desc->channel[i],
- &dst_desc->channel[i],
- sizeof(src_desc->channel[0])));
- }
- }
+ if (info->mask & PIPE_MASK_RGBA) {
+ for (int i = 0; i < common_channels; i++) {
+ fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
+ sizeof(src_desc->channel[0])));
+ }
+ }
- fail_if(info->alpha_blend);
+ fail_if(info->alpha_blend);
- return true;
+ return true;
}
static void
emit_setup(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->draw;
- struct fd_screen *screen = batch->ctx->screen;
-
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
- fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
- fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
-
- /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
- OUT_WFI5(ring);
- OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
- OUT_RING(ring, A6XX_RB_CCU_CNTL_OFFSET(screen->info.a6xx.ccu_offset_bypass));
+ struct fd_ringbuffer *ring = batch->draw;
+ struct fd_screen *screen = batch->ctx->screen;
+
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
+ fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
+
+ /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
+ OUT_WFI5(ring);
+ OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, A6XX_RB_CCU_CNTL_OFFSET(screen->info.a6xx.ccu_offset_bypass));
}
static void
-emit_blit_setup(struct fd_ringbuffer *ring,
- enum pipe_format pfmt, bool scissor_enable, union pipe_color_union *color)
+emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
+ bool scissor_enable, union pipe_color_union *color)
{
- enum a6xx_format fmt = fd6_pipe2color(pfmt);
- bool is_srgb = util_format_is_srgb(pfmt);
- enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
-
- if (is_srgb) {
- assert(ifmt == R2D_UNORM8);
- ifmt = R2D_UNORM8_SRGB;
- }
-
- uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
- A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
- A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
- COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
- COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
- OUT_RING(ring, blit_cntl);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
- OUT_RING(ring, blit_cntl);
-
- if (fmt == FMT6_10_10_10_2_UNORM_DEST)
- fmt = FMT6_16_16_16_16_FLOAT;
-
- /* This register is probably badly named... it seems that it's
- * controlling the internal/accumulator format or something like
- * that. It's certainly not tied to only the src format.
- */
- OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
- OUT_RING(ring, A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) |
- COND(util_format_is_pure_sint(pfmt),
- A6XX_SP_2D_DST_FORMAT_SINT) |
- COND(util_format_is_pure_uint(pfmt),
- A6XX_SP_2D_DST_FORMAT_UINT) |
- COND(util_format_is_snorm(pfmt),
- A6XX_SP_2D_DST_FORMAT_SINT |
- A6XX_SP_2D_DST_FORMAT_NORM) |
- COND(util_format_is_unorm(pfmt),
-// TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that
-// A6XX_SP_2D_DST_FORMAT_UINT |
- A6XX_SP_2D_DST_FORMAT_NORM) |
- COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) |
- A6XX_SP_2D_DST_FORMAT_MASK(0xf));
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
- OUT_RING(ring, 0);
+ enum a6xx_format fmt = fd6_pipe2color(pfmt);
+ bool is_srgb = util_format_is_srgb(pfmt);
+ enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
+
+ if (is_srgb) {
+ assert(ifmt == R2D_UNORM8);
+ ifmt = R2D_UNORM8_SRGB;
+ }
+
+ uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
+ A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
+ A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
+ COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
+ COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+ OUT_RING(ring, blit_cntl);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+ OUT_RING(ring, blit_cntl);
+
+ if (fmt == FMT6_10_10_10_2_UNORM_DEST)
+ fmt = FMT6_16_16_16_16_FLOAT;
+
+ /* This register is probably badly named... it seems that it's
+ * controlling the internal/accumulator format or something like
+ * that. It's certainly not tied to only the src format.
+ */
+ OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
+ OUT_RING(
+ ring,
+ A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) |
+ COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) |
+ COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) |
+ COND(util_format_is_snorm(pfmt),
+ A6XX_SP_2D_DST_FORMAT_SINT | A6XX_SP_2D_DST_FORMAT_NORM) |
+ COND(util_format_is_unorm(pfmt),
+ // TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about
+ // that
+ // A6XX_SP_2D_DST_FORMAT_UINT
+ //|
+ A6XX_SP_2D_DST_FORMAT_NORM) |
+ COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) |
+ A6XX_SP_2D_DST_FORMAT_MASK(0xf));
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
+ OUT_RING(ring, 0);
}
/* buffers need to be handled specially since x/width can exceed the bounds
*/
static void
emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
- const struct pipe_blit_info *info)
+ const struct pipe_blit_info *info)
{
- const struct pipe_box *sbox = &info->src.box;
- const struct pipe_box *dbox = &info->dst.box;
- struct fd_resource *src, *dst;
- unsigned sshift, dshift;
-
- if (DEBUG_BLIT) {
- fprintf(stderr, "buffer blit: ");
- dump_blit_info(info);
- }
-
- src = fd_resource(info->src.resource);
- dst = fd_resource(info->dst.resource);
-
- debug_assert(src->layout.cpp == 1);
- debug_assert(dst->layout.cpp == 1);
- debug_assert(info->src.resource->format == info->dst.resource->format);
- debug_assert((sbox->y == 0) && (sbox->height == 1));
- debug_assert((dbox->y == 0) && (dbox->height == 1));
- debug_assert((sbox->z == 0) && (sbox->depth == 1));
- debug_assert((dbox->z == 0) && (dbox->depth == 1));
- debug_assert(sbox->width == dbox->width);
- debug_assert(info->src.level == 0);
- debug_assert(info->dst.level == 0);
-
- /*
- * Buffers can have dimensions bigger than max width, remap into
- * multiple 1d blits to fit within max dimension
- *
- * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
- * seems to prevent overfetch related faults. Not quite sure what
- * the deal is there.
- *
- * Low 6 bits of SRC/DST addresses need to be zero (ie. address
- * aligned to 64) so we need to shift src/dst x1/x2 to make up the
- * difference. On top of already splitting up the blit so width
- * isn't > 16k.
- *
- * We perhaps could do a bit better, if src and dst are aligned but
- * in the worst case this means we have to split the copy up into
- * 16k (0x4000) minus 64 (0x40).
- */
-
- sshift = sbox->x & 0x3f;
- dshift = dbox->x & 0x3f;
-
- emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL);
-
- for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
- unsigned soff, doff, w, p;
-
- soff = (sbox->x + off) & ~0x3f;
- doff = (dbox->x + off) & ~0x3f;
-
- w = MIN2(sbox->width - off, (0x4000 - 0x40));
- p = align(w, 64);
-
- debug_assert((soff + w) <= fd_bo_size(src->bo));
- debug_assert((doff + w) <= fd_bo_size(dst->bo));
-
- /*
- * Emit source:
- */
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
- A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
- 0x500000);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
- A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
- OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(p));
-
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- /*
- * Emit destination:
- */
- OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
- A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
- OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
- OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- /*
- * Blit command:
- */
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sshift));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) | A6XX_GRAS_2D_DST_BR_Y(0));
-
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, 0x3f);
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
-
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
- }
+ const struct pipe_box *sbox = &info->src.box;
+ const struct pipe_box *dbox = &info->dst.box;
+ struct fd_resource *src, *dst;
+ unsigned sshift, dshift;
+
+ if (DEBUG_BLIT) {
+ fprintf(stderr, "buffer blit: ");
+ dump_blit_info(info);
+ }
+
+ src = fd_resource(info->src.resource);
+ dst = fd_resource(info->dst.resource);
+
+ debug_assert(src->layout.cpp == 1);
+ debug_assert(dst->layout.cpp == 1);
+ debug_assert(info->src.resource->format == info->dst.resource->format);
+ debug_assert((sbox->y == 0) && (sbox->height == 1));
+ debug_assert((dbox->y == 0) && (dbox->height == 1));
+ debug_assert((sbox->z == 0) && (sbox->depth == 1));
+ debug_assert((dbox->z == 0) && (dbox->depth == 1));
+ debug_assert(sbox->width == dbox->width);
+ debug_assert(info->src.level == 0);
+ debug_assert(info->dst.level == 0);
+
+ /*
+ * Buffers can have dimensions bigger than max width, remap into
+ * multiple 1d blits to fit within max dimension
+ *
+ * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
+ * seems to prevent overfetch related faults. Not quite sure what
+ * the deal is there.
+ *
+ * Low 6 bits of SRC/DST addresses need to be zero (ie. address
+ * aligned to 64) so we need to shift src/dst x1/x2 to make up the
+ * difference. On top of already splitting up the blit so width
+ * isn't > 16k.
+ *
+ * We perhaps could do a bit better, if src and dst are aligned but
+ * in the worst case this means we have to split the copy up into
+ * 16k (0x4000) minus 64 (0x40).
+ */
+
+ sshift = sbox->x & 0x3f;
+ dshift = dbox->x & 0x3f;
+
+ emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL);
+
+ for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
+ unsigned soff, doff, w, p;
+
+ soff = (sbox->x + off) & ~0x3f;
+ doff = (dbox->x + off) & ~0x3f;
+
+ w = MIN2(sbox->width - off, (0x4000 - 0x40));
+ p = align(w, 64);
+
+ debug_assert((soff + w) <= fd_bo_size(src->bo));
+ debug_assert((doff + w) <= fd_bo_size(dst->bo));
+
+ /*
+ * Emit source:
+ */
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
+ A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
+ OUT_RING(ring,
+ A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
+ A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
+ OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(p));
+
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ /*
+ * Emit destination:
+ */
+ OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
+ A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+ OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
+ OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ /*
+ * Blit command:
+ */
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sshift));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
+ A6XX_GRAS_2D_DST_BR_Y(0));
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, 0x3f);
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
+
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
+ }
}
static void
-fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc)
- assert_dt
+fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
{
- struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
- union pipe_color_union color = {};
-
- emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color);
-
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
-
- unsigned size = rsc->layout.slices[0].offset;
- unsigned offset = 0;
-
- /* We could be more clever here and realize that we could use a
- * larger width if the size is aligned to something more than a
- * single page.. or even use a format larger than r8 in those
- * cases. But for normal sized textures and even up to 16k x 16k
- * at <= 4byte/pixel, we'll only go thru the loop once
- */
- const unsigned w = 0x1000;
-
- /* ubwc size should always be page aligned: */
- assert((size % w) == 0);
-
- while (size > 0) {
- const unsigned h = MIN2(0x4000, size / w);
- /* width is already aligned to a suitable pitch: */
- const unsigned p = w;
-
- /*
- * Emit destination:
- */
- OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
- A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
- OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */
- OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- /*
- * Blit command:
- */
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
-
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, 0x3f);
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, batch->ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
-
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
-
- offset += w * h;
- size -= w * h;
- }
-
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
- fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
- fd6_cache_inv(batch, ring);
+ struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
+ union pipe_color_union color = {};
+
+ emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color);
+
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
+
+ unsigned size = rsc->layout.slices[0].offset;
+ unsigned offset = 0;
+
+ /* We could be more clever here and realize that we could use a
+ * larger width if the size is aligned to something more than a
+ * single page.. or even use a format larger than r8 in those
+ * cases. But for normal sized textures and even up to 16k x 16k
+ * at <= 4byte/pixel, we'll only go thru the loop once
+ */
+ const unsigned w = 0x1000;
+
+ /* ubwc size should always be page aligned: */
+ assert((size % w) == 0);
+
+ while (size > 0) {
+ const unsigned h = MIN2(0x4000, size / w);
+ /* width is already aligned to a suitable pitch: */
+ const unsigned p = w;
+
+ /*
+ * Emit destination:
+ */
+ OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
+ A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */
+ OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ /*
+ * Blit command:
+ */
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
+ OUT_RING(ring,
+ A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, 0x3f);
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, batch->ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
+
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
+
+ offset += w * h;
+ size -= w * h;
+ }
+
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+ fd6_cache_inv(batch, ring);
}
static void
-emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, enum pipe_format pfmt, unsigned level, unsigned layer)
+emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
+ enum pipe_format pfmt, unsigned level, unsigned layer)
{
- struct fd_resource *dst = fd_resource(prsc);
- enum a6xx_format fmt = fd6_pipe2color(pfmt);
- enum a6xx_tile_mode tile = fd_resource_tile_mode(prsc, level);
- enum a3xx_color_swap swap = fd6_resource_swap(dst, pfmt);
- uint32_t pitch = fd_resource_pitch(dst, level);
- bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
- unsigned off = fd_resource_offset(dst, level, layer);
-
- if (fmt == FMT6_Z24_UNORM_S8_UINT)
- fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(fmt) |
- A6XX_RB_2D_DST_INFO_TILE_MODE(tile) |
- A6XX_RB_2D_DST_INFO_COLOR_SWAP(swap) |
- COND(util_format_is_srgb(pfmt), A6XX_RB_2D_DST_INFO_SRGB) |
- COND(ubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
- OUT_RELOC(ring, dst->bo, off, 0, 0); /* RB_2D_DST_LO/HI */
- OUT_RING(ring, A6XX_RB_2D_DST_PITCH(pitch));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- if (ubwc_enabled) {
- OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
- fd6_emit_flag_reference(ring, dst, level, layer);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ struct fd_resource *dst = fd_resource(prsc);
+ enum a6xx_format fmt = fd6_pipe2color(pfmt);
+ enum a6xx_tile_mode tile = fd_resource_tile_mode(prsc, level);
+ enum a3xx_color_swap swap = fd6_resource_swap(dst, pfmt);
+ uint32_t pitch = fd_resource_pitch(dst, level);
+ bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
+ unsigned off = fd_resource_offset(dst, level, layer);
+
+ if (fmt == FMT6_Z24_UNORM_S8_UINT)
+ fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(fmt) |
+ A6XX_RB_2D_DST_INFO_TILE_MODE(tile) |
+ A6XX_RB_2D_DST_INFO_COLOR_SWAP(swap) |
+ COND(util_format_is_srgb(pfmt), A6XX_RB_2D_DST_INFO_SRGB) |
+ COND(ubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
+ OUT_RELOC(ring, dst->bo, off, 0, 0); /* RB_2D_DST_LO/HI */
+ OUT_RING(ring, A6XX_RB_2D_DST_PITCH(pitch));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ if (ubwc_enabled) {
+ OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
+ fd6_emit_flag_reference(ring, dst, level, layer);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
}
static void
-emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, unsigned layer, unsigned nr_samples)
+emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
+ unsigned layer, unsigned nr_samples)
{
- struct fd_resource *src = fd_resource(info->src.resource);
- enum a6xx_format sfmt = fd6_pipe2color(info->src.format);
- enum a6xx_tile_mode stile = fd_resource_tile_mode(info->src.resource, info->src.level);
- enum a3xx_color_swap sswap = fd6_resource_swap(src, info->src.format);
- uint32_t pitch = fd_resource_pitch(src, info->src.level);
- bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
- unsigned soff = fd_resource_offset(src, info->src.level, layer);
- uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
- uint32_t height = u_minify(src->b.b.height0, info->src.level);
- uint32_t filter = 0;
-
- if (info->filter == PIPE_TEX_FILTER_LINEAR)
- filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
-
- enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
-
- if (sfmt == FMT6_10_10_10_2_UNORM_DEST)
- sfmt = FMT6_10_10_10_2_UNORM;
-
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
- A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
- A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
- A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
- COND(samples > MSAA_ONE && (info->mask & PIPE_MASK_RGBA),
- A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
- COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
- COND(util_format_is_srgb(info->src.format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
- 0x500000 | filter);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
- A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
- OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
-
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- if (subwc_enabled) {
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6);
- fd6_emit_flag_reference(ring, src, info->src.level, layer);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ struct fd_resource *src = fd_resource(info->src.resource);
+ enum a6xx_format sfmt = fd6_pipe2color(info->src.format);
+ enum a6xx_tile_mode stile =
+ fd_resource_tile_mode(info->src.resource, info->src.level);
+ enum a3xx_color_swap sswap = fd6_resource_swap(src, info->src.format);
+ uint32_t pitch = fd_resource_pitch(src, info->src.level);
+ bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
+ unsigned soff = fd_resource_offset(src, info->src.level, layer);
+ uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
+ uint32_t height = u_minify(src->b.b.height0, info->src.level);
+ uint32_t filter = 0;
+
+ if (info->filter == PIPE_TEX_FILTER_LINEAR)
+ filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
+
+ enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
+
+ if (sfmt == FMT6_10_10_10_2_UNORM_DEST)
+ sfmt = FMT6_10_10_10_2_UNORM;
+
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+ A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
+ A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
+ A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
+ COND(samples > MSAA_ONE && (info->mask & PIPE_MASK_RGBA),
+ A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
+ COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
+ COND(util_format_is_srgb(info->src.format),
+ A6XX_SP_PS_2D_SRC_INFO_SRGB) |
+ 0x500000 | filter);
+ OUT_RING(ring,
+ A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
+ A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
+ OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
+
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ if (subwc_enabled) {
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6);
+ fd6_emit_flag_reference(ring, src, info->src.level, layer);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
}
static void
-emit_blit_texture(struct fd_context *ctx,
- struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
+emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ const struct pipe_blit_info *info)
{
- const struct pipe_box *sbox = &info->src.box;
- const struct pipe_box *dbox = &info->dst.box;
- struct fd_resource *dst;
- int sx1, sy1, sx2, sy2;
- int dx1, dy1, dx2, dy2;
-
- if (DEBUG_BLIT) {
- fprintf(stderr, "texture blit: ");
- dump_blit_info(info);
- }
-
- dst = fd_resource(info->dst.resource);
-
- uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
-
- sx1 = sbox->x * nr_samples;
- sy1 = sbox->y;
- sx2 = (sbox->x + sbox->width) * nr_samples - 1;
- sy2 = sbox->y + sbox->height - 1;
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sx1));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sx2));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(sy1));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(sy2));
-
- dx1 = dbox->x * nr_samples;
- dy1 = dbox->y;
- dx2 = (dbox->x + dbox->width) * nr_samples - 1;
- dy2 = dbox->y + dbox->height - 1;
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
-
- if (info->scissor_enable) {
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
- OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
- A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
- OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
- A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
- }
-
- emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL);
-
- for (unsigned i = 0; i < info->dst.box.depth; i++) {
-
- emit_blit_src(ring, info, sbox->z + i, nr_samples);
- emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level, dbox->z + i);
-
- /*
- * Blit command:
- */
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, 0x3f);
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
-
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
- }
+ const struct pipe_box *sbox = &info->src.box;
+ const struct pipe_box *dbox = &info->dst.box;
+ struct fd_resource *dst;
+ int sx1, sy1, sx2, sy2;
+ int dx1, dy1, dx2, dy2;
+
+ if (DEBUG_BLIT) {
+ fprintf(stderr, "texture blit: ");
+ dump_blit_info(info);
+ }
+
+ dst = fd_resource(info->dst.resource);
+
+ uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
+
+ sx1 = sbox->x * nr_samples;
+ sy1 = sbox->y;
+ sx2 = (sbox->x + sbox->width) * nr_samples - 1;
+ sy2 = sbox->y + sbox->height - 1;
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sx1));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sx2));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(sy1));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(sy2));
+
+ dx1 = dbox->x * nr_samples;
+ dy1 = dbox->y;
+ dx2 = (dbox->x + dbox->width) * nr_samples - 1;
+ dy2 = dbox->y + dbox->height - 1;
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
+ OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
+
+ if (info->scissor_enable) {
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
+ A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
+ OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
+ A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
+ }
+
+ emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL);
+
+ for (unsigned i = 0; i < info->dst.box.depth; i++) {
+
+ emit_blit_src(ring, info, sbox->z + i, nr_samples);
+ emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
+ dbox->z + i);
+
+ /*
+ * Blit command:
+ */
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, 0x3f);
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
+
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
+ }
}
static void
-emit_clear_color(struct fd_ringbuffer *ring,
- enum pipe_format pfmt, union pipe_color_union *color)
+emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
+ union pipe_color_union *color)
{
- switch (pfmt) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X24S8_UINT: {
- uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
- uint8_t stencil = color->ui[1];
- color->ui[0] = depth_unorm24 & 0xff;
- color->ui[1] = (depth_unorm24 >> 8) & 0xff;
- color->ui[2] = (depth_unorm24 >> 16) & 0xff;
- color->ui[3] = stencil;
- break;
- }
- default:
- break;
- }
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
- switch (fd6_ifmt(fd6_pipe2color(pfmt))) {
- case R2D_UNORM8:
- case R2D_UNORM8_SRGB:
- /* The r2d ifmt is badly named, it also covers the signed case: */
- if (util_format_is_snorm(pfmt)) {
- OUT_RING(ring, float_to_byte_tex(color->f[0]));
- OUT_RING(ring, float_to_byte_tex(color->f[1]));
- OUT_RING(ring, float_to_byte_tex(color->f[2]));
- OUT_RING(ring, float_to_byte_tex(color->f[3]));
- } else {
- OUT_RING(ring, float_to_ubyte(color->f[0]));
- OUT_RING(ring, float_to_ubyte(color->f[1]));
- OUT_RING(ring, float_to_ubyte(color->f[2]));
- OUT_RING(ring, float_to_ubyte(color->f[3]));
- }
- break;
- case R2D_FLOAT16:
- OUT_RING(ring, _mesa_float_to_half(color->f[0]));
- OUT_RING(ring, _mesa_float_to_half(color->f[1]));
- OUT_RING(ring, _mesa_float_to_half(color->f[2]));
- OUT_RING(ring, _mesa_float_to_half(color->f[3]));
- break;
- case R2D_FLOAT32:
- case R2D_INT32:
- case R2D_INT16:
- case R2D_INT8:
- default:
- OUT_RING(ring, color->ui[0]);
- OUT_RING(ring, color->ui[1]);
- OUT_RING(ring, color->ui[2]);
- OUT_RING(ring, color->ui[3]);
- break;
- }
+ switch (pfmt) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X24S8_UINT: {
+ uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
+ uint8_t stencil = color->ui[1];
+ color->ui[0] = depth_unorm24 & 0xff;
+ color->ui[1] = (depth_unorm24 >> 8) & 0xff;
+ color->ui[2] = (depth_unorm24 >> 16) & 0xff;
+ color->ui[3] = stencil;
+ break;
+ }
+ default:
+ break;
+ }
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
+ switch (fd6_ifmt(fd6_pipe2color(pfmt))) {
+ case R2D_UNORM8:
+ case R2D_UNORM8_SRGB:
+ /* The r2d ifmt is badly named, it also covers the signed case: */
+ if (util_format_is_snorm(pfmt)) {
+ OUT_RING(ring, float_to_byte_tex(color->f[0]));
+ OUT_RING(ring, float_to_byte_tex(color->f[1]));
+ OUT_RING(ring, float_to_byte_tex(color->f[2]));
+ OUT_RING(ring, float_to_byte_tex(color->f[3]));
+ } else {
+ OUT_RING(ring, float_to_ubyte(color->f[0]));
+ OUT_RING(ring, float_to_ubyte(color->f[1]));
+ OUT_RING(ring, float_to_ubyte(color->f[2]));
+ OUT_RING(ring, float_to_ubyte(color->f[3]));
+ }
+ break;
+ case R2D_FLOAT16:
+ OUT_RING(ring, _mesa_float_to_half(color->f[0]));
+ OUT_RING(ring, _mesa_float_to_half(color->f[1]));
+ OUT_RING(ring, _mesa_float_to_half(color->f[2]));
+ OUT_RING(ring, _mesa_float_to_half(color->f[3]));
+ break;
+ case R2D_FLOAT32:
+ case R2D_INT32:
+ case R2D_INT16:
+ case R2D_INT8:
+ default:
+ OUT_RING(ring, color->ui[0]);
+ OUT_RING(ring, color->ui[1]);
+ OUT_RING(ring, color->ui[2]);
+ OUT_RING(ring, color->ui[3]);
+ break;
+ }
}
/**
static union pipe_color_union
convert_color(enum pipe_format format, union pipe_color_union *pcolor)
{
- union pipe_color_union color = *pcolor;
+ union pipe_color_union color = *pcolor;
- /* For solid-fill blits, the hw isn't going to convert from
- * linear to srgb for us:
- */
- if (util_format_is_srgb(format)) {
- for (int i = 0; i < 3; i++)
- color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
- }
+ /* For solid-fill blits, the hw isn't going to convert from
+ * linear to srgb for us:
+ */
+ if (util_format_is_srgb(format)) {
+ for (int i = 0; i < 3; i++)
+ color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
+ }
- if (util_format_is_snorm(format)) {
- for (int i = 0; i < 3; i++)
- color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
- }
+ if (util_format_is_snorm(format)) {
+ for (int i = 0; i < 3; i++)
+ color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
+ }
- /* Note that float_to_ubyte() already clamps, for the unorm case */
+ /* Note that float_to_ubyte() already clamps, for the unorm case */
- return color;
+ return color;
}
void
-fd6_clear_surface(struct fd_context *ctx,
- struct fd_ringbuffer *ring, struct pipe_surface *psurf,
- uint32_t width, uint32_t height, union pipe_color_union *color)
+fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct pipe_surface *psurf, uint32_t width, uint32_t height,
+ union pipe_color_union *color)
{
- if (DEBUG_BLIT) {
- fprintf(stderr, "surface clear:\ndst resource: ");
- util_dump_resource(stderr, psurf->texture);
- fprintf(stderr, "\n");
- }
+ if (DEBUG_BLIT) {
+ fprintf(stderr, "surface clear:\ndst resource: ");
+ util_dump_resource(stderr, psurf->texture);
+ fprintf(stderr, "\n");
+ }
- uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(width * nr_samples - 1) |
- A6XX_GRAS_2D_DST_BR_Y(height - 1));
+ uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(width * nr_samples - 1) |
+ A6XX_GRAS_2D_DST_BR_Y(height - 1));
- union pipe_color_union clear_color = convert_color(psurf->format, color);
+ union pipe_color_union clear_color = convert_color(psurf->format, color);
- emit_clear_color(ring, psurf->format, &clear_color);
- emit_blit_setup(ring, psurf->format, false, &clear_color);
+ emit_clear_color(ring, psurf->format, &clear_color);
+ emit_blit_setup(ring, psurf->format, false, &clear_color);
- for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer; i++) {
- emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
+ for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
+ i++) {
+ emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
- /*
- * Blit command:
- */
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, 0x3f);
- OUT_WFI5(ring);
+ /*
+ * Blit command:
+ */
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, 0x3f);
+ OUT_WFI5(ring);
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, ctx->screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
- OUT_WFI5(ring);
+ OUT_WFI5(ring);
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
- }
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
+ }
}
void
fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
- uint32_t base, struct pipe_surface *psurf)
+ uint32_t base, struct pipe_surface *psurf)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
- uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
- util_format_get_blocksize(psurf->format);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
- A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1));
-
- /* Enable scissor bit, which will take into account the window scissor
- * which is set per-tile
- */
- emit_blit_setup(ring, psurf->format, true, NULL);
-
- /* We shouldn't be using GMEM in the layered rendering case: */
- assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
- psurf->u.tex.first_layer);
-
- enum a6xx_format sfmt = fd6_pipe2color(psurf->format);
- enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
-
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
- A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) |
- A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
- COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
- COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
- A6XX_SP_PS_2D_SRC_INFO_UNK20 |
- A6XX_SP_PS_2D_SRC_INFO_UNK22);
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) |
- A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height));
- OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */
- OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */
- OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- /* sync GMEM writes with CACHE. */
- fd6_cache_inv(batch, ring);
-
- /* Wait for CACHE_INVALIDATE to land */
- fd_wfi(batch, ring);
-
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- OUT_WFI5(ring);
-
- /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
- * sysmem, and we generally assume that GMEM renderpasses leave their
- * results in sysmem, so we need to flush manually here.
- */
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
+ uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
+ util_format_get_blocksize(psurf->format);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
+ A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1));
+
+ /* Enable scissor bit, which will take into account the window scissor
+ * which is set per-tile
+ */
+ emit_blit_setup(ring, psurf->format, true, NULL);
+
+ /* We shouldn't be using GMEM in the layered rendering case: */
+ assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+
+ enum a6xx_format sfmt = fd6_pipe2color(psurf->format);
+ enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
+
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
+ OUT_RING(
+ ring,
+ A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+ A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) |
+ A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
+ COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
+ COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
+ A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22);
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) |
+ A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height));
+ OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */
+ OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */
+ OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ /* sync GMEM writes with CACHE. */
+ fd6_cache_inv(batch, ring);
+
+ /* Wait for CACHE_INVALIDATE to land */
+ fd_wfi(batch, ring);
+
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ OUT_WFI5(ring);
+
+ /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+ * sysmem, and we generally assume that GMEM renderpasses leave their
+ * results in sysmem, so we need to flush manually here.
+ */
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
}
static bool
-handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+handle_rgba_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt
{
- struct fd_batch *batch;
+ struct fd_batch *batch;
- debug_assert(!(info->mask & PIPE_MASK_ZS));
+ debug_assert(!(info->mask & PIPE_MASK_ZS));
- if (!can_do_blit(info))
- return false;
+ if (!can_do_blit(info))
+ return false;
- batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
+ batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- fd_batch_resource_read(batch, fd_resource(info->src.resource));
- fd_batch_resource_write(batch, fd_resource(info->dst.resource));
+ fd_batch_resource_read(batch, fd_resource(info->src.resource));
+ fd_batch_resource_write(batch, fd_resource(info->dst.resource));
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
- ASSERTED bool ret = fd_batch_lock_submit(batch);
- assert(ret);
+ ASSERTED bool ret = fd_batch_lock_submit(batch);
+ assert(ret);
- /* Clearing last_fence must come after the batch dependency tracking
- * (resource_read()/resource_write()), as that can trigger a flush,
- * re-populating last_fence
- */
- fd_fence_ref(&ctx->last_fence, NULL);
+ /* Clearing last_fence must come after the batch dependency tracking
+ * (resource_read()/resource_write()), as that can trigger a flush,
+ * re-populating last_fence
+ */
+ fd_fence_ref(&ctx->last_fence, NULL);
- fd_batch_update_queries(batch);
+ fd_batch_update_queries(batch);
- emit_setup(batch);
+ emit_setup(batch);
- trace_start_blit(&batch->trace, info->src.resource->target, info->dst.resource->target);
+ trace_start_blit(&batch->trace, info->src.resource->target,
+ info->dst.resource->target);
- if ((info->src.resource->target == PIPE_BUFFER) &&
- (info->dst.resource->target == PIPE_BUFFER)) {
- assert(fd_resource(info->src.resource)->layout.tile_mode == TILE6_LINEAR);
- assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE6_LINEAR);
- emit_blit_buffer(ctx, batch->draw, info);
- } else {
- /* I don't *think* we need to handle blits between buffer <-> !buffer */
- debug_assert(info->src.resource->target != PIPE_BUFFER);
- debug_assert(info->dst.resource->target != PIPE_BUFFER);
- emit_blit_texture(ctx, batch->draw, info);
- }
+ if ((info->src.resource->target == PIPE_BUFFER) &&
+ (info->dst.resource->target == PIPE_BUFFER)) {
+ assert(fd_resource(info->src.resource)->layout.tile_mode == TILE6_LINEAR);
+ assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE6_LINEAR);
+ emit_blit_buffer(ctx, batch->draw, info);
+ } else {
+ /* I don't *think* we need to handle blits between buffer <-> !buffer */
+ debug_assert(info->src.resource->target != PIPE_BUFFER);
+ debug_assert(info->dst.resource->target != PIPE_BUFFER);
+ emit_blit_texture(ctx, batch->draw, info);
+ }
- trace_end_blit(&batch->trace);
+ trace_end_blit(&batch->trace);
- fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
- fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
- fd6_cache_inv(batch, batch->draw);
+ fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
+ fd6_cache_inv(batch, batch->draw);
- fd_batch_unlock_submit(batch);
+ fd_batch_unlock_submit(batch);
- fd_resource(info->dst.resource)->valid = true;
- batch->needs_flush = true;
+ fd_resource(info->dst.resource)->valid = true;
+ batch->needs_flush = true;
- fd_batch_flush(batch);
- fd_batch_reference(&batch, NULL);
+ fd_batch_flush(batch);
+ fd_batch_reference(&batch, NULL);
- /* Acc query state will have been dirtied by our fd_batch_update_queries, so
- * the ctx->batch may need to turn its queries back on.
- */
- ctx->update_active_queries = true;
+ /* Acc query state will have been dirtied by our fd_batch_update_queries, so
+ * the ctx->batch may need to turn its queries back on.
+ */
+ ctx->update_active_queries = true;
- return true;
+ return true;
}
/**
* ourself and never "fail".
*/
static bool
-do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+do_rewritten_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt
{
- bool success = handle_rgba_blit(ctx, info);
- if (!success)
- success = fd_blitter_blit(ctx, info);
- debug_assert(success); /* fallback should never fail! */
- return success;
+ bool success = handle_rgba_blit(ctx, info);
+ if (!success)
+ success = fd_blitter_blit(ctx, info);
+ debug_assert(success); /* fallback should never fail! */
+ return success;
}
/**
* blit into an equivilant format that we can handle
*/
static bool
-handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+handle_zs_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt
{
- struct pipe_blit_info blit = *info;
-
- if (DEBUG_BLIT) {
- fprintf(stderr, "---- handle_zs_blit: ");
- dump_blit_info(info);
- }
-
- struct fd_resource *src = fd_resource(info->src.resource);
- struct fd_resource *dst = fd_resource(info->dst.resource);
-
- switch (info->dst.format) {
- case PIPE_FORMAT_S8_UINT:
- debug_assert(info->mask == PIPE_MASK_S);
- blit.mask = PIPE_MASK_R;
- blit.src.format = PIPE_FORMAT_R8_UINT;
- blit.dst.format = PIPE_FORMAT_R8_UINT;
- return do_rewritten_blit(ctx, &blit);
-
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- if (info->mask & PIPE_MASK_Z) {
- blit.mask = PIPE_MASK_R;
- blit.src.format = PIPE_FORMAT_R32_FLOAT;
- blit.dst.format = PIPE_FORMAT_R32_FLOAT;
- do_rewritten_blit(ctx, &blit);
- }
-
- if (info->mask & PIPE_MASK_S) {
- blit.mask = PIPE_MASK_R;
- blit.src.format = PIPE_FORMAT_R8_UINT;
- blit.dst.format = PIPE_FORMAT_R8_UINT;
- blit.src.resource = &src->stencil->b.b;
- blit.dst.resource = &dst->stencil->b.b;
- do_rewritten_blit(ctx, &blit);
- }
-
- return true;
-
- case PIPE_FORMAT_Z16_UNORM:
- blit.mask = PIPE_MASK_R;
- blit.src.format = PIPE_FORMAT_R16_UNORM;
- blit.dst.format = PIPE_FORMAT_R16_UNORM;
- return do_rewritten_blit(ctx, &blit);
-
- case PIPE_FORMAT_Z32_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- debug_assert(info->mask == PIPE_MASK_Z);
- blit.mask = PIPE_MASK_R;
- blit.src.format = PIPE_FORMAT_R32_UINT;
- blit.dst.format = PIPE_FORMAT_R32_UINT;
- return do_rewritten_blit(ctx, &blit);
-
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- blit.mask = 0;
- if (info->mask & PIPE_MASK_Z)
- blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
- if (info->mask & PIPE_MASK_S)
- blit.mask |= PIPE_MASK_A;
- blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
- blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
- /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
- * 8888_unorm.
- */
- if (!ctx->screen->info.a6xx.has_z24uint_s8uint) {
- if (!src->layout.ubwc)
- blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
- if (!dst->layout.ubwc)
- blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
- }
- return fd_blitter_blit(ctx, &blit);
-
- default:
- return false;
- }
+ struct pipe_blit_info blit = *info;
+
+ if (DEBUG_BLIT) {
+ fprintf(stderr, "---- handle_zs_blit: ");
+ dump_blit_info(info);
+ }
+
+ struct fd_resource *src = fd_resource(info->src.resource);
+ struct fd_resource *dst = fd_resource(info->dst.resource);
+
+ switch (info->dst.format) {
+ case PIPE_FORMAT_S8_UINT:
+ debug_assert(info->mask == PIPE_MASK_S);
+ blit.mask = PIPE_MASK_R;
+ blit.src.format = PIPE_FORMAT_R8_UINT;
+ blit.dst.format = PIPE_FORMAT_R8_UINT;
+ return do_rewritten_blit(ctx, &blit);
+
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (info->mask & PIPE_MASK_Z) {
+ blit.mask = PIPE_MASK_R;
+ blit.src.format = PIPE_FORMAT_R32_FLOAT;
+ blit.dst.format = PIPE_FORMAT_R32_FLOAT;
+ do_rewritten_blit(ctx, &blit);
+ }
+
+ if (info->mask & PIPE_MASK_S) {
+ blit.mask = PIPE_MASK_R;
+ blit.src.format = PIPE_FORMAT_R8_UINT;
+ blit.dst.format = PIPE_FORMAT_R8_UINT;
+ blit.src.resource = &src->stencil->b.b;
+ blit.dst.resource = &dst->stencil->b.b;
+ do_rewritten_blit(ctx, &blit);
+ }
+
+ return true;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ blit.mask = PIPE_MASK_R;
+ blit.src.format = PIPE_FORMAT_R16_UNORM;
+ blit.dst.format = PIPE_FORMAT_R16_UNORM;
+ return do_rewritten_blit(ctx, &blit);
+
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ debug_assert(info->mask == PIPE_MASK_Z);
+ blit.mask = PIPE_MASK_R;
+ blit.src.format = PIPE_FORMAT_R32_UINT;
+ blit.dst.format = PIPE_FORMAT_R32_UINT;
+ return do_rewritten_blit(ctx, &blit);
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ blit.mask = 0;
+ if (info->mask & PIPE_MASK_Z)
+ blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
+ if (info->mask & PIPE_MASK_S)
+ blit.mask |= PIPE_MASK_A;
+ blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
+ * 8888_unorm.
+ */
+ if (!ctx->screen->info.a6xx.has_z24uint_s8uint) {
+ if (!src->layout.ubwc)
+ blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
+ if (!dst->layout.ubwc)
+ blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
+ }
+ return fd_blitter_blit(ctx, &blit);
+
+ default:
+ return false;
+ }
}
static bool
-handle_compressed_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+handle_compressed_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt
{
- struct pipe_blit_info blit = *info;
+ struct pipe_blit_info blit = *info;
- if (DEBUG_BLIT) {
- fprintf(stderr, "---- handle_compressed_blit: ");
- dump_blit_info(info);
- }
+ if (DEBUG_BLIT) {
+ fprintf(stderr, "---- handle_compressed_blit: ");
+ dump_blit_info(info);
+ }
- if (info->src.format != info->dst.format)
- return fd_blitter_blit(ctx, info);
+ if (info->src.format != info->dst.format)
+ return fd_blitter_blit(ctx, info);
- if (util_format_get_blocksize(info->src.format) == 8) {
- blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
- } else {
- debug_assert(util_format_get_blocksize(info->src.format) == 16);
- blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
- }
+ if (util_format_get_blocksize(info->src.format) == 8) {
+ blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
+ } else {
+ debug_assert(util_format_get_blocksize(info->src.format) == 16);
+ blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
+ }
- int bw = util_format_get_blockwidth(info->src.format);
- int bh = util_format_get_blockheight(info->src.format);
+ int bw = util_format_get_blockwidth(info->src.format);
+ int bh = util_format_get_blockheight(info->src.format);
- /* NOTE: x/y *must* be aligned to block boundary (ie. in
- * glCompressedTexSubImage2D()) but width/height may not
- * be:
- */
+ /* NOTE: x/y *must* be aligned to block boundary (ie. in
+ * glCompressedTexSubImage2D()) but width/height may not
+ * be:
+ */
- debug_assert((blit.src.box.x % bw) == 0);
- debug_assert((blit.src.box.y % bh) == 0);
+ debug_assert((blit.src.box.x % bw) == 0);
+ debug_assert((blit.src.box.y % bh) == 0);
- blit.src.box.x /= bw;
- blit.src.box.y /= bh;
- blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
- blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
+ blit.src.box.x /= bw;
+ blit.src.box.y /= bh;
+ blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
+ blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
- debug_assert((blit.dst.box.x % bw) == 0);
- debug_assert((blit.dst.box.y % bh) == 0);
+ debug_assert((blit.dst.box.x % bw) == 0);
+ debug_assert((blit.dst.box.y % bh) == 0);
- blit.dst.box.x /= bw;
- blit.dst.box.y /= bh;
- blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
- blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
+ blit.dst.box.x /= bw;
+ blit.dst.box.y /= bh;
+ blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
+ blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
- return do_rewritten_blit(ctx, &blit);
+ return do_rewritten_blit(ctx, &blit);
}
static bool
-fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
- assert_dt
+fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
{
- if (info->mask & PIPE_MASK_ZS)
- return handle_zs_blit(ctx, info);
- if (util_format_is_compressed(info->src.format) ||
- util_format_is_compressed(info->dst.format))
- return handle_compressed_blit(ctx, info);
+ if (info->mask & PIPE_MASK_ZS)
+ return handle_zs_blit(ctx, info);
+ if (util_format_is_compressed(info->src.format) ||
+ util_format_is_compressed(info->dst.format))
+ return handle_compressed_blit(ctx, info);
- return handle_rgba_blit(ctx, info);
+ return handle_rgba_blit(ctx, info);
}
void
-fd6_blitter_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- fd_context(pctx)->clear_ubwc = fd6_clear_ubwc;
+ fd_context(pctx)->clear_ubwc = fd6_clear_ubwc;
- if (FD_DBG(NOBLIT))
- return;
+ if (FD_DBG(NOBLIT))
+ return;
- fd_context(pctx)->blit = fd6_blit;
+ fd_context(pctx)->blit = fd6_blit;
}
unsigned
fd6_tile_mode(const struct pipe_resource *tmpl)
{
- /* if the mipmap level 0 is still too small to be tiled, then don't
- * bother pretending:
- */
- if (fd_resource_level_linear(tmpl, 0))
- return TILE6_LINEAR;
-
- /* basically just has to be a format we can blit, so uploads/downloads
- * via linear staging buffer works:
- */
- if (ok_format(tmpl->format))
- return TILE6_3;
-
- return TILE6_LINEAR;
+ /* if the mipmap level 0 is still too small to be tiled, then don't
+ * bother pretending:
+ */
+ if (fd_resource_level_linear(tmpl, 0))
+ return TILE6_LINEAR;
+
+ /* basically just has to be a format we can blit, so uploads/downloads
+ * via linear staging buffer works:
+ */
+ if (ok_format(tmpl->format))
+ return TILE6_3;
+
+ return TILE6_LINEAR;
}
* instead of CP_EVENT_WRITE::BLITs
*/
-void fd6_clear_surface(struct fd_context *ctx,
- struct fd_ringbuffer *ring, struct pipe_surface *psurf,
- uint32_t width, uint32_t height, union pipe_color_union *color) assert_dt;
+void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct pipe_surface *psurf, uint32_t width,
+ uint32_t height,
+ union pipe_color_union *color) assert_dt;
void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
- uint32_t base, struct pipe_surface *psurf) assert_dt;
+ uint32_t base, struct pipe_surface *psurf) assert_dt;
#endif /* FD6_BLIT_H_ */
#include "fd6_emit.h"
#include "fd6_pack.h"
-
/* maybe move to fd6_program? */
static void
cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct ir3_shader_variant *v) assert_dt
+ struct ir3_shader_variant *v) assert_dt
{
- const struct ir3_info *i = &v->info;
- enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
-
- OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
- .vs_state = true,
- .hs_state = true,
- .ds_state = true,
- .gs_state = true,
- .fs_state = true,
- .cs_state = true,
- .gfx_ibo = true,
- .cs_ibo = true,
- ));
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
- OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
- A6XX_HLSQ_CS_CNTL_ENABLED);
-
- OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
- OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
- A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos +
- v->shader->nir->info.num_images) |
- A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
- A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */
- OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */
-
- OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
- OUT_RING(ring, A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
- A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
- A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
- COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
-
- uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
- OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
- OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
- A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
-
- uint32_t local_invocation_id, work_group_id;
- local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
- work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2);
- OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
- A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
- A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
- A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
- OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
- A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
-
- OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
- OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
-
- if (v->instrlen > 0)
- fd6_emit_shader(ctx, ring, v);
+ const struct ir3_info *i = &v->info;
+ enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
+
+ OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
+ .ds_state = true, .gs_state = true,
+ .fs_state = true, .cs_state = true,
+ .gfx_ibo = true, .cs_ibo = true, ));
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
+ A6XX_HLSQ_CS_CNTL_ENABLED);
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
+ OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
+ A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos +
+ v->shader->nir->info.num_images) |
+ A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
+ A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */
+ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
+ OUT_RING(ring,
+ A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
+ A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
+ A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
+ COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+
+ uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
+ OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
+ OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
+ A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
+
+ uint32_t local_invocation_id, work_group_id;
+ local_invocation_id =
+ ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
+ work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2);
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
+ A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
+ A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
+ A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
+ A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
+ OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
+
+ if (v->instrlen > 0)
+ fd6_emit_shader(ctx, ring, v);
}
static void
-fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
- in_dt
+fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
{
- struct ir3_shader_key key = {};
- struct ir3_shader_variant *v;
- struct fd_ringbuffer *ring = ctx->batch->draw;
- unsigned nglobal = 0;
-
- v = ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
- if (!v)
- return;
-
- if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
- cs_program_emit(ctx, ring, v);
-
- fd6_emit_cs_state(ctx, ring, v);
- fd6_emit_cs_consts(v, ring, ctx, info);
-
- u_foreach_bit(i, ctx->global_bindings.enabled_mask)
- nglobal++;
-
- if (nglobal > 0) {
- /* global resources don't otherwise get an OUT_RELOC(), since
- * the raw ptr address is emitted in ir3_emit_cs_consts().
- * So to make the kernel aware that these buffers are referenced
- * by the batch, emit dummy reloc's as part of a no-op packet
- * payload:
- */
- OUT_PKT7(ring, CP_NOP, 2 * nglobal);
- u_foreach_bit(i, ctx->global_bindings.enabled_mask) {
- struct pipe_resource *prsc = ctx->global_bindings.buf[i];
- OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
- }
- }
-
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
-
- const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size;
- const unsigned *num_groups = info->grid;
- /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
- const unsigned work_dim = info->work_dim ? info->work_dim : 3;
- OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
- OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
- A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
- A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
- A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
- OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
- OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
- OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
- OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
- OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
-
- trace_grid_info(&ctx->batch->trace, info);
- trace_start_compute(&ctx->batch->trace);
-
- if (info->indirect) {
- struct fd_resource *rsc = fd_resource(info->indirect);
-
- OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
- OUT_RING(ring, A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
- A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
- A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
- } else {
- OUT_PKT7(ring, CP_EXEC_CS, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
- OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
- OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
- }
-
- trace_end_compute(&ctx->batch->trace);
-
- OUT_WFI5(ring);
-
- fd6_cache_flush(ctx->batch, ring);
+ struct ir3_shader_key key = {};
+ struct ir3_shader_variant *v;
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+ unsigned nglobal = 0;
+
+ v =
+ ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
+ if (!v)
+ return;
+
+ if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
+ cs_program_emit(ctx, ring, v);
+
+ fd6_emit_cs_state(ctx, ring, v);
+ fd6_emit_cs_consts(v, ring, ctx, info);
+
+ u_foreach_bit (i, ctx->global_bindings.enabled_mask)
+ nglobal++;
+
+ if (nglobal > 0) {
+ /* global resources don't otherwise get an OUT_RELOC(), since
+ * the raw ptr address is emitted in ir3_emit_cs_consts().
+ * So to make the kernel aware that these buffers are referenced
+ * by the batch, emit dummy reloc's as part of a no-op packet
+ * payload:
+ */
+ OUT_PKT7(ring, CP_NOP, 2 * nglobal);
+ u_foreach_bit (i, ctx->global_bindings.enabled_mask) {
+ struct pipe_resource *prsc = ctx->global_bindings.buf[i];
+ OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
+ }
+ }
+
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
+
+ const unsigned *local_size =
+ info->block; // v->shader->nir->info->cs.local_size;
+ const unsigned *num_groups = info->grid;
+ /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
+ const unsigned work_dim = info->work_dim ? info->work_dim : 3;
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
+ OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
+ OUT_RING(ring,
+ A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
+ OUT_RING(ring,
+ A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
+ OUT_RING(ring,
+ A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
+
+ trace_grid_info(&ctx->batch->trace, info);
+ trace_start_compute(&ctx->batch->trace);
+
+ if (info->indirect) {
+ struct fd_resource *rsc = fd_resource(info->indirect);
+
+ OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
+ OUT_RING(ring,
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
+ A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
+ } else {
+ OUT_PKT7(ring, CP_EXEC_CS, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
+ OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
+ OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
+ }
+
+ trace_end_compute(&ctx->batch->trace);
+
+ OUT_WFI5(ring);
+
+ fd6_cache_flush(ctx->batch, ring);
}
void
-fd6_compute_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->launch_grid = fd6_launch_grid;
- pctx->create_compute_state = ir3_shader_compute_state_create;
- pctx->delete_compute_state = ir3_shader_state_delete;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->launch_grid = fd6_launch_grid;
+ pctx->create_compute_state = ir3_shader_compute_state_create;
+ pctx->delete_compute_state = ir3_shader_state_delete;
}
#include "fd6_pack.h"
#define emit_const_user fd6_emit_const_user
-#define emit_const_bo fd6_emit_const_bo
+#define emit_const_bo fd6_emit_const_bo
#include "ir3_const.h"
/* regid: base const register
*/
void
fd6_emit_const_user(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid,
- uint32_t sizedwords, const uint32_t *dwords)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
{
- emit_const_asserts(ring, v, regid, sizedwords);
-
- /* NOTE we cheat a bit here, since we know mesa is aligning
- * the size of the user buffer to 16 bytes. And we want to
- * cut cycles in a hot path.
- */
- uint32_t align_sz = align(sizedwords, 4);
-
- if (fd6_geom_stage(v->type)) {
- OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_DIRECT,
- .state_block = fd6_stage2shadersb(v->type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_1(),
- CP_LOAD_STATE6_2()
- );
- } else {
- OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_DIRECT,
- .state_block = fd6_stage2shadersb(v->type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_1(),
- CP_LOAD_STATE6_2()
- );
- }
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ /* NOTE we cheat a bit here, since we know mesa is aligning
+ * the size of the user buffer to 16 bytes. And we want to
+ * cut cycles in a hot path.
+ */
+ uint32_t align_sz = align(sizedwords, 4);
+
+ if (fd6_geom_stage(v->type)) {
+ OUT_PKTBUF(
+ ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
+ CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
+ .state_src = SS6_DIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)),
+ CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2());
+ } else {
+ OUT_PKTBUF(
+ ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
+ CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
+ .state_src = SS6_DIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)),
+ CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2());
+ }
}
void
fd6_emit_const_bo(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid,
- uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
{
- uint32_t dst_off = regid / 4;
- assert(dst_off % 4 == 0);
- uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
- assert(num_unit % 4 == 0);
-
- emit_const_asserts(ring, v, regid, sizedwords);
-
- if (fd6_geom_stage(v->type)) {
- OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
- CP_LOAD_STATE6_0(
- .dst_off = dst_off,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_INDIRECT,
- .state_block = fd6_stage2shadersb(v->type),
- .num_unit = num_unit,
- ),
- CP_LOAD_STATE6_EXT_SRC_ADDR(
- .bo = bo,
- .bo_offset = offset
- )
- );
- } else {
- OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
- CP_LOAD_STATE6_0(
- .dst_off = dst_off,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_INDIRECT,
- .state_block = fd6_stage2shadersb(v->type),
- .num_unit = num_unit,
- ),
- CP_LOAD_STATE6_EXT_SRC_ADDR(
- .bo = bo,
- .bo_offset = offset
- )
- );
- }
+ uint32_t dst_off = regid / 4;
+ assert(dst_off % 4 == 0);
+ uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
+ assert(num_unit % 4 == 0);
+
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ if (fd6_geom_stage(v->type)) {
+ OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
+ CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
+ .state_src = SS6_INDIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = num_unit, ),
+ CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
+ } else {
+ OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
+ CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
+ .state_src = SS6_INDIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = num_unit, ),
+ CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
+ }
}
static bool
is_stateobj(struct fd_ringbuffer *ring)
{
- return true;
+ return true;
}
static void
-emit_const_ptrs(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t num, struct fd_bo **bos, uint32_t *offsets)
+emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
+ uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
+ uint32_t *offsets)
{
- unreachable("shouldn't be called on a6xx");
+ unreachable("shouldn't be called on a6xx");
}
static void
-emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s)
- assert_dt
+emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit,
+ struct ir3_shader_variant *s) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- const struct ir3_const_state *const_state = ir3_const_state(s);
- const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
- uint32_t dwords = 16;
-
- OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
- CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
- OUT_RB(ring, ctx->batch->tess_addrs_constobj);
+ struct fd_context *ctx = emit->ctx;
+ const struct ir3_const_state *const_state = ir3_const_state(s);
+ const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
+ uint32_t dwords = 16;
+
+ OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
+ CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
+ OUT_RB(ring, ctx->batch->tess_addrs_constobj);
}
static void
emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
- uint32_t *params, int num_params)
+ uint32_t *params, int num_params)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- const unsigned regid = const_state->offsets.primitive_param;
- int size = MIN2(1 + regid, v->constlen) - regid;
- if (size > 0)
- fd6_emit_const_user(ring, v, regid * 4, num_params, params);
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ const unsigned regid = const_state->offsets.primitive_param;
+ int size = MIN2(1 + regid, v->constlen) - regid;
+ if (size > 0)
+ fd6_emit_const_user(ring, v, regid * 4, num_params, params);
}
struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit *emit)
{
- struct fd_context *ctx = emit->ctx;
-
- struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
- ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
-
- /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
- * size is dwords, since that's what LDG/STG use.
- */
- unsigned num_vertices =
- emit->hs ?
- emit->info->vertices_per_patch :
- emit->gs->shader->nir->info.gs.vertices_in;
-
- uint32_t vs_params[4] = {
- emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
- emit->vs->output_size * 4, /* vs vertex stride */
- 0,
- 0
- };
-
- emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
-
- if (emit->hs) {
- uint32_t hs_params[4] = {
- emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
- emit->vs->output_size * 4, /* vs vertex stride */
- emit->hs->output_size,
- emit->info->vertices_per_patch
- };
-
- emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params));
- emit_tess_bos(constobj, emit, emit->hs);
-
- if (emit->gs)
- num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
-
- uint32_t ds_params[4] = {
- emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
- emit->ds->output_size * 4, /* ds vertex stride */
- emit->hs->output_size, /* hs vertex stride (dwords) */
- emit->hs->shader->nir->info.tess.tcs_vertices_out
- };
-
- emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params));
- emit_tess_bos(constobj, emit, emit->ds);
- }
-
- if (emit->gs) {
- struct ir3_shader_variant *prev;
- if (emit->ds)
- prev = emit->ds;
- else
- prev = emit->vs;
-
- uint32_t gs_params[4] = {
- prev->output_size * num_vertices * 4, /* ds primitive stride */
- prev->output_size * 4, /* ds vertex stride */
- 0,
- 0,
- };
-
- num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
- emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params));
- }
-
- return constobj;
+ struct fd_context *ctx = emit->ctx;
+
+ struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+
+ /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
+ * size is dwords, since that's what LDG/STG use.
+ */
+ unsigned num_vertices = emit->hs
+ ? emit->info->vertices_per_patch
+ : emit->gs->shader->nir->info.gs.vertices_in;
+
+ uint32_t vs_params[4] = {
+ emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
+ emit->vs->output_size * 4, /* vs vertex stride */
+ 0, 0};
+
+ emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
+
+ if (emit->hs) {
+ uint32_t hs_params[4] = {
+ emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
+ emit->vs->output_size * 4, /* vs vertex stride */
+ emit->hs->output_size, emit->info->vertices_per_patch};
+
+ emit_stage_tess_consts(constobj, emit->hs, hs_params,
+ ARRAY_SIZE(hs_params));
+ emit_tess_bos(constobj, emit, emit->hs);
+
+ if (emit->gs)
+ num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
+
+ uint32_t ds_params[4] = {
+ emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
+ emit->ds->output_size * 4, /* ds vertex stride */
+ emit->hs->output_size, /* hs vertex stride (dwords) */
+ emit->hs->shader->nir->info.tess.tcs_vertices_out};
+
+ emit_stage_tess_consts(constobj, emit->ds, ds_params,
+ ARRAY_SIZE(ds_params));
+ emit_tess_bos(constobj, emit, emit->ds);
+ }
+
+ if (emit->gs) {
+ struct ir3_shader_variant *prev;
+ if (emit->ds)
+ prev = emit->ds;
+ else
+ prev = emit->vs;
+
+ uint32_t gs_params[4] = {
+ prev->output_size * num_vertices * 4, /* ds primitive stride */
+ prev->output_size * 4, /* ds vertex stride */
+ 0,
+ 0,
+ };
+
+ num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
+ emit_stage_tess_consts(constobj, emit->gs, gs_params,
+ ARRAY_SIZE(gs_params));
+ }
+
+ return constobj;
}
static void
fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+ struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- int num_ubos = const_state->num_ubos;
-
- if (!num_ubos)
- return;
-
- OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
- CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-
- for (int i = 0; i < num_ubos; i++) {
- /* NIR constant data is packed into the end of the shader. */
- if (i == const_state->constant_data_ubo) {
- int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
- OUT_RELOC(ring, v->bo,
- v->info.constant_data_offset,
- (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32,
- 0);
- continue;
- }
-
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
-
- /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
- * to a buffer now, and save it in the constbuf so that we don't have
- * to reupload until they get changed.
- */
- if (cb->user_buffer) {
- struct pipe_context *pctx = &ctx->base;
- u_upload_data(pctx->stream_uploader, 0,
- cb->buffer_size,
- 64,
- cb->user_buffer,
- &cb->buffer_offset, &cb->buffer);
- cb->user_buffer = NULL;
- }
-
- if (cb->buffer) {
- int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo,
- cb->buffer_offset,
- (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32,
- 0);
- } else {
- OUT_RING(ring, 0xbad00000 | (i << 16));
- OUT_RING(ring, A6XX_UBO_1_SIZE(0));
- }
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ int num_ubos = const_state->num_ubos;
+
+ if (!num_ubos)
+ return;
+
+ OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
+ CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
+ OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+
+ for (int i = 0; i < num_ubos; i++) {
+ /* NIR constant data is packed into the end of the shader. */
+ if (i == const_state->constant_data_ubo) {
+ int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
+ OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
+ (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
+ continue;
+ }
+
+ struct pipe_constant_buffer *cb = &constbuf->cb[i];
+
+ /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
+ * to a buffer now, and save it in the constbuf so that we don't have
+ * to reupload until they get changed.
+ */
+ if (cb->user_buffer) {
+ struct pipe_context *pctx = &ctx->base;
+ u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
+ cb->user_buffer, &cb->buffer_offset, &cb->buffer);
+ cb->user_buffer = NULL;
+ }
+
+ if (cb->buffer) {
+ int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
+ OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
+ (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ OUT_RING(ring, A6XX_UBO_1_SIZE(0));
+ }
+ }
}
static unsigned
user_consts_cmdstream_size(struct ir3_shader_variant *v)
{
- struct ir3_const_state *const_state = ir3_const_state(v);
- struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
+ struct ir3_const_state *const_state = ir3_const_state(v);
+ struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
- if (unlikely(!ubo_state->cmdstream_size)) {
- unsigned packets, size;
+ if (unlikely(!ubo_state->cmdstream_size)) {
+ unsigned packets, size;
- /* pre-calculate size required for userconst stateobj: */
- ir3_user_consts_size(ubo_state, &packets, &size);
+ /* pre-calculate size required for userconst stateobj: */
+ ir3_user_consts_size(ubo_state, &packets, &size);
- /* also account for UBO addresses: */
- packets += 1;
- size += 2 * const_state->num_ubos;
+ /* also account for UBO addresses: */
+ packets += 1;
+ size += 2 * const_state->num_ubos;
- unsigned sizedwords = (4 * packets) + size;
- ubo_state->cmdstream_size = sizedwords * 4;
- }
+ unsigned sizedwords = (4 * packets) + size;
+ ubo_state->cmdstream_size = sizedwords * 4;
+ }
- return ubo_state->cmdstream_size;
+ return ubo_state->cmdstream_size;
}
struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit *emit)
{
- static const enum pipe_shader_type types[] = {
- PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
- PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
- };
- struct ir3_shader_variant *variants[] = {
- emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
- };
- struct fd_context *ctx = emit->ctx;
- unsigned sz = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
- if (!variants[i])
- continue;
- sz += user_consts_cmdstream_size(variants[i]);
- }
-
- struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
- ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
-
- for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
- if (!variants[i])
- continue;
- ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
- fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
- }
-
- return constobj;
+ static const enum pipe_shader_type types[] = {
+ PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
+ PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
+ };
+ struct ir3_shader_variant *variants[] = {
+ emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
+ };
+ struct fd_context *ctx = emit->ctx;
+ unsigned sz = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
+ if (!variants[i])
+ continue;
+ sz += user_consts_cmdstream_size(variants[i]);
+ }
+
+ struct fd_ringbuffer *constobj =
+ fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
+ if (!variants[i])
+ continue;
+ ir3_emit_user_consts(ctx->screen, variants[i], constobj,
+ &ctx->constbuf[types[i]]);
+ fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
+ }
+
+ return constobj;
}
struct fd_ringbuffer *
fd6_build_vs_driver_params(struct fd6_emit *emit)
{
- struct fd_context *ctx = emit->ctx;
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- const struct ir3_shader_variant *vs = emit->vs;
-
- if (vs->need_driver_params) {
- struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
- ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
- ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info, emit->indirect, emit->draw);
- fd6_ctx->has_dp_state = true;
- return dpconstobj;
- }
-
- fd6_ctx->has_dp_state = false;
- return NULL;
+ struct fd_context *ctx = emit->ctx;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ const struct ir3_shader_variant *vs = emit->vs;
+
+ if (vs->need_driver_params) {
+ struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
+ ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
+ ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info, emit->indirect,
+ emit->draw);
+ fd6_ctx->has_dp_state = true;
+ return dpconstobj;
+ }
+
+ fd6_ctx->has_dp_state = false;
+ return NULL;
}
void
fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
- enum pipe_shader_type stage, struct fd_ringbuffer *ring)
+ enum pipe_shader_type stage, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = emit->ctx;
+ struct fd_context *ctx = emit->ctx;
- ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
- ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
+ ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
+ ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
}
void
-fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_grid_info *info)
+fd6_emit_cs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_grid_info *info)
{
- ir3_emit_cs_consts(v, ring, ctx, info);
- fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
+ ir3_emit_cs_consts(v, ring, ctx, info);
+ fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
}
void
-fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring)
+fd6_emit_immediates(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring)
{
- ir3_emit_immediates(screen, v, ring);
+ ir3_emit_immediates(screen, v, ring);
}
void
fd6_emit_link_map(struct fd_screen *screen,
- const struct ir3_shader_variant *producer,
- const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+ const struct ir3_shader_variant *producer,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring)
{
- ir3_emit_link_map(screen, producer, v, ring);
+ ir3_emit_link_map(screen, producer, v, ring);
}
* SOFTWARE.
*/
-
#ifndef FD6_CONST_H
#define FD6_CONST_H
#include "fd6_emit.h"
-struct fd_ringbuffer * fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
-struct fd_ringbuffer * fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
-struct fd_ringbuffer * fd6_build_vs_driver_params(struct fd6_emit *emit) assert_dt;
+struct fd_ringbuffer *fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
+struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
+struct fd_ringbuffer *
+fd6_build_vs_driver_params(struct fd6_emit *emit) assert_dt;
-void fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
- enum pipe_shader_type stage, struct fd_ringbuffer *ring) assert_dt;
-void fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_grid_info *info) assert_dt;
-void fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring) assert_dt;
+void fd6_emit_ibo_consts(struct fd6_emit *emit,
+ const struct ir3_shader_variant *v,
+ enum pipe_shader_type stage,
+ struct fd_ringbuffer *ring) assert_dt;
+void fd6_emit_cs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_grid_info *info) assert_dt;
+void fd6_emit_immediates(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring) assert_dt;
void fd6_emit_link_map(struct fd_screen *screen,
- const struct ir3_shader_variant *producer,
- const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) assert_dt;
+ const struct ir3_shader_variant *producer,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring) assert_dt;
#endif /* FD6_CONST_H */
#include "freedreno_query_acc.h"
-#include "fd6_context.h"
-#include "fd6_compute.h"
#include "fd6_blend.h"
#include "fd6_blitter.h"
+#include "fd6_compute.h"
+#include "fd6_context.h"
#include "fd6_draw.h"
#include "fd6_emit.h"
#include "fd6_gmem.h"
#include "fd6_zsa.h"
static void
-fd6_context_destroy(struct pipe_context *pctx)
- in_dt
+fd6_context_destroy(struct pipe_context *pctx) in_dt
{
- struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+ struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
- u_upload_destroy(fd6_ctx->border_color_uploader);
- pipe_resource_reference(&fd6_ctx->border_color_buf, NULL);
+ u_upload_destroy(fd6_ctx->border_color_uploader);
+ pipe_resource_reference(&fd6_ctx->border_color_buf, NULL);
- fd_context_destroy(pctx);
+ fd_context_destroy(pctx);
- if (fd6_ctx->vsc_draw_strm)
- fd_bo_del(fd6_ctx->vsc_draw_strm);
- if (fd6_ctx->vsc_prim_strm)
- fd_bo_del(fd6_ctx->vsc_prim_strm);
- fd_bo_del(fd6_ctx->control_mem);
+ if (fd6_ctx->vsc_draw_strm)
+ fd_bo_del(fd6_ctx->vsc_draw_strm);
+ if (fd6_ctx->vsc_prim_strm)
+ fd_bo_del(fd6_ctx->vsc_prim_strm);
+ fd_bo_del(fd6_ctx->control_mem);
- fd_context_cleanup_common_vbos(&fd6_ctx->base);
+ fd_context_cleanup_common_vbos(&fd6_ctx->base);
- fd6_texture_fini(pctx);
+ fd6_texture_fini(pctx);
- free(fd6_ctx);
+ free(fd6_ctx);
}
/* clang-format off */
static void *
fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
- const struct pipe_vertex_element *elements)
+ const struct pipe_vertex_element *elements)
{
- struct fd_context *ctx = fd_context(pctx);
-
- struct fd6_vertex_stateobj *state = CALLOC_STRUCT(fd6_vertex_stateobj);
- memcpy(state->base.pipe, elements, sizeof(*elements) * num_elements);
- state->base.num_elements = num_elements;
- state->stateobj =
- fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 2 + 1));
- struct fd_ringbuffer *ring = state->stateobj;
-
- OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * num_elements);
- for (int32_t i = 0; i < num_elements; i++) {
- const struct pipe_vertex_element *elem = &elements[i];
- enum pipe_format pfmt = elem->src_format;
- enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
- bool isint = util_format_is_pure_integer(pfmt);
- debug_assert(fmt != FMT6_NONE);
-
- OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(elem->vertex_buffer_index) |
- A6XX_VFD_DECODE_INSTR_OFFSET(elem->src_offset) |
- A6XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) |
- A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) |
- A6XX_VFD_DECODE_INSTR_UNK30 |
- COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT));
- OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
- }
-
- return state;
+ struct fd_context *ctx = fd_context(pctx);
+
+ struct fd6_vertex_stateobj *state = CALLOC_STRUCT(fd6_vertex_stateobj);
+ memcpy(state->base.pipe, elements, sizeof(*elements) * num_elements);
+ state->base.num_elements = num_elements;
+ state->stateobj =
+ fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 2 + 1));
+ struct fd_ringbuffer *ring = state->stateobj;
+
+ OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * num_elements);
+ for (int32_t i = 0; i < num_elements; i++) {
+ const struct pipe_vertex_element *elem = &elements[i];
+ enum pipe_format pfmt = elem->src_format;
+ enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
+ bool isint = util_format_is_pure_integer(pfmt);
+ debug_assert(fmt != FMT6_NONE);
+
+ OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(elem->vertex_buffer_index) |
+ A6XX_VFD_DECODE_INSTR_OFFSET(elem->src_offset) |
+ A6XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ COND(elem->instance_divisor,
+ A6XX_VFD_DECODE_INSTR_INSTANCED) |
+ A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) |
+ A6XX_VFD_DECODE_INSTR_UNK30 |
+ COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT));
+ OUT_RING(ring,
+ MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
+ }
+
+ return state;
}
static void
fd6_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd6_vertex_stateobj *so = hwcso;
+ struct fd6_vertex_stateobj *so = hwcso;
- fd_ringbuffer_del(so->stateobj);
- FREE(hwcso);
+ fd_ringbuffer_del(so->stateobj);
+ FREE(hwcso);
}
static void
setup_state_map(struct fd_context *ctx)
{
- STATIC_ASSERT(FD6_GROUP_NON_GROUP < 32);
-
- fd_context_add_map(ctx, FD_DIRTY_VTXSTATE, BIT(FD6_GROUP_VTXSTATE));
- fd_context_add_map(ctx, FD_DIRTY_VTXBUF, BIT(FD6_GROUP_VBO));
- fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_ZSA));
- fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG,
- BIT(FD6_GROUP_LRZ) | BIT(FD6_GROUP_LRZ_BINNING));
- fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_PROG));
- fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER));
- fd_context_add_map(ctx, FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
- FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL,
- BIT(FD6_GROUP_PROG_FB_RAST));
- fd_context_add_map(ctx, FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK, BIT(FD6_GROUP_BLEND));
- fd_context_add_map(ctx, FD_DIRTY_BLEND_COLOR, BIT(FD6_GROUP_BLEND_COLOR));
- fd_context_add_map(ctx, FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_PROG,
- BIT(FD6_GROUP_IBO));
- fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_VS_TEX) | BIT(FD6_GROUP_HS_TEX) |
- BIT(FD6_GROUP_DS_TEX) | BIT(FD6_GROUP_GS_TEX) | BIT(FD6_GROUP_FS_TEX));
- fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_CONST, BIT(FD6_GROUP_CONST));
- fd_context_add_map(ctx, FD_DIRTY_STREAMOUT, BIT(FD6_GROUP_SO));
-
- fd_context_add_shader_map(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_TEX,
- BIT(FD6_GROUP_VS_TEX));
- fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_CTRL, FD_DIRTY_SHADER_TEX,
- BIT(FD6_GROUP_HS_TEX));
- fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_EVAL, FD_DIRTY_SHADER_TEX,
- BIT(FD6_GROUP_DS_TEX));
- fd_context_add_shader_map(ctx, PIPE_SHADER_GEOMETRY, FD_DIRTY_SHADER_TEX,
- BIT(FD6_GROUP_GS_TEX));
- fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_TEX,
- BIT(FD6_GROUP_FS_TEX));
-
- /* NOTE: scissor enabled bit is part of rasterizer state, but
- * fd_rasterizer_state_bind() will mark scissor dirty if needed:
- */
- fd_context_add_map(ctx, FD_DIRTY_SCISSOR, BIT(FD6_GROUP_SCISSOR));
-
- /* Stuff still emit in IB2
- *
- * NOTE: viewport state doesn't seem to change frequently, so possibly
- * move it into FD6_GROUP_RASTERIZER?
- */
- fd_context_add_map(ctx, FD_DIRTY_STENCIL_REF | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER,
- BIT(FD6_GROUP_NON_GROUP));
+ STATIC_ASSERT(FD6_GROUP_NON_GROUP < 32);
+
+ fd_context_add_map(ctx, FD_DIRTY_VTXSTATE, BIT(FD6_GROUP_VTXSTATE));
+ fd_context_add_map(ctx, FD_DIRTY_VTXBUF, BIT(FD6_GROUP_VBO));
+ fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER,
+ BIT(FD6_GROUP_ZSA));
+ fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG,
+ BIT(FD6_GROUP_LRZ) | BIT(FD6_GROUP_LRZ_BINNING));
+ fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_PROG));
+ fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER));
+ fd_context_add_map(ctx,
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
+ FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL,
+ BIT(FD6_GROUP_PROG_FB_RAST));
+ fd_context_add_map(ctx, FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK,
+ BIT(FD6_GROUP_BLEND));
+ fd_context_add_map(ctx, FD_DIRTY_BLEND_COLOR, BIT(FD6_GROUP_BLEND_COLOR));
+ fd_context_add_map(ctx, FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_PROG,
+ BIT(FD6_GROUP_IBO));
+ fd_context_add_map(ctx, FD_DIRTY_PROG,
+ BIT(FD6_GROUP_VS_TEX) | BIT(FD6_GROUP_HS_TEX) |
+ BIT(FD6_GROUP_DS_TEX) | BIT(FD6_GROUP_GS_TEX) |
+ BIT(FD6_GROUP_FS_TEX));
+ fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_CONST,
+ BIT(FD6_GROUP_CONST));
+ fd_context_add_map(ctx, FD_DIRTY_STREAMOUT, BIT(FD6_GROUP_SO));
+
+ fd_context_add_shader_map(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_TEX,
+ BIT(FD6_GROUP_VS_TEX));
+ fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_CTRL, FD_DIRTY_SHADER_TEX,
+ BIT(FD6_GROUP_HS_TEX));
+ fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_EVAL, FD_DIRTY_SHADER_TEX,
+ BIT(FD6_GROUP_DS_TEX));
+ fd_context_add_shader_map(ctx, PIPE_SHADER_GEOMETRY, FD_DIRTY_SHADER_TEX,
+ BIT(FD6_GROUP_GS_TEX));
+ fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_TEX,
+ BIT(FD6_GROUP_FS_TEX));
+
+ /* NOTE: scissor enabled bit is part of rasterizer state, but
+ * fd_rasterizer_state_bind() will mark scissor dirty if needed:
+ */
+ fd_context_add_map(ctx, FD_DIRTY_SCISSOR, BIT(FD6_GROUP_SCISSOR));
+
+ /* Stuff still emit in IB2
+ *
+ * NOTE: viewport state doesn't seem to change frequently, so possibly
+ * move it into FD6_GROUP_RASTERIZER?
+ */
+ fd_context_add_map(
+ ctx, FD_DIRTY_STENCIL_REF | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER,
+ BIT(FD6_GROUP_NON_GROUP));
}
struct pipe_context *
-fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
- disable_thread_safety_analysis
+fd6_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags) disable_thread_safety_analysis
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd6_context *fd6_ctx = CALLOC_STRUCT(fd6_context);
- struct pipe_context *pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd6_context *fd6_ctx = CALLOC_STRUCT(fd6_context);
+ struct pipe_context *pctx;
- if (!fd6_ctx)
- return NULL;
+ if (!fd6_ctx)
+ return NULL;
- pctx = &fd6_ctx->base.base;
- pctx->screen = pscreen;
+ pctx = &fd6_ctx->base.base;
+ pctx->screen = pscreen;
- fd6_ctx->base.dev = fd_device_ref(screen->dev);
- fd6_ctx->base.screen = fd_screen(pscreen);
- fd6_ctx->base.last.key = &fd6_ctx->last_key;
+ fd6_ctx->base.dev = fd_device_ref(screen->dev);
+ fd6_ctx->base.screen = fd_screen(pscreen);
+ fd6_ctx->base.last.key = &fd6_ctx->last_key;
- pctx->destroy = fd6_context_destroy;
- pctx->create_blend_state = fd6_blend_state_create;
- pctx->create_rasterizer_state = fd6_rasterizer_state_create;
- pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create;
- pctx->create_vertex_elements_state = fd6_vertex_state_create;
+ pctx->destroy = fd6_context_destroy;
+ pctx->create_blend_state = fd6_blend_state_create;
+ pctx->create_rasterizer_state = fd6_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create;
+ pctx->create_vertex_elements_state = fd6_vertex_state_create;
- fd6_draw_init(pctx);
- fd6_compute_init(pctx);
- fd6_gmem_init(pctx);
- fd6_texture_init(pctx);
- fd6_prog_init(pctx);
- fd6_emit_init(pctx);
- fd6_query_context_init(pctx);
+ fd6_draw_init(pctx);
+ fd6_compute_init(pctx);
+ fd6_gmem_init(pctx);
+ fd6_texture_init(pctx);
+ fd6_prog_init(pctx);
+ fd6_emit_init(pctx);
+ fd6_query_context_init(pctx);
- setup_state_map(&fd6_ctx->base);
+ setup_state_map(&fd6_ctx->base);
- pctx = fd_context_init(&fd6_ctx->base, pscreen, primtypes, priv, flags);
- if (!pctx)
- return NULL;
+ pctx = fd_context_init(&fd6_ctx->base, pscreen, primtypes, priv, flags);
+ if (!pctx)
+ return NULL;
- /* after fd_context_init() to override set_shader_images() */
- fd6_image_init(pctx);
+ /* after fd_context_init() to override set_shader_images() */
+ fd6_image_init(pctx);
- util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true);
+ util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true);
- pctx->delete_vertex_elements_state = fd6_vertex_state_delete;
+ pctx->delete_vertex_elements_state = fd6_vertex_state_delete;
- /* fd_context_init overwrites delete_rasterizer_state, so set this
- * here. */
- pctx->delete_rasterizer_state = fd6_rasterizer_state_delete;
- pctx->delete_blend_state = fd6_blend_state_delete;
- pctx->delete_depth_stencil_alpha_state = fd6_zsa_state_delete;
+ /* fd_context_init overwrites delete_rasterizer_state, so set this
+ * here. */
+ pctx->delete_rasterizer_state = fd6_rasterizer_state_delete;
+ pctx->delete_blend_state = fd6_blend_state_delete;
+ pctx->delete_depth_stencil_alpha_state = fd6_zsa_state_delete;
- /* initial sizes for VSC buffers (or rather the per-pipe sizes
- * which is used to derive entire buffer size:
- */
- fd6_ctx->vsc_draw_strm_pitch = 0x440;
- fd6_ctx->vsc_prim_strm_pitch = 0x1040;
+ /* initial sizes for VSC buffers (or rather the per-pipe sizes
+ * which is used to derive entire buffer size:
+ */
+ fd6_ctx->vsc_draw_strm_pitch = 0x440;
+ fd6_ctx->vsc_prim_strm_pitch = 0x1040;
- fd6_ctx->control_mem = fd_bo_new(screen->dev, 0x1000,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "control");
+ fd6_ctx->control_mem =
+ fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "control");
- memset(fd_bo_map(fd6_ctx->control_mem), 0,
- sizeof(struct fd6_control));
+ memset(fd_bo_map(fd6_ctx->control_mem), 0, sizeof(struct fd6_control));
- fd_context_setup_common_vbos(&fd6_ctx->base);
+ fd_context_setup_common_vbos(&fd6_ctx->base);
- fd6_blitter_init(pctx);
+ fd6_blitter_init(pctx);
- fd6_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
- PIPE_USAGE_STREAM, 0);
+ fd6_ctx->border_color_uploader =
+ u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
- return fd_context_init_tc(pctx, flags);
+ return fd_context_init_tc(pctx, flags);
}
#include "a6xx.xml.h"
struct fd6_lrz_state {
- bool enable : 1;
- bool write : 1;
- bool test : 1;
- enum fd_lrz_direction direction : 2;
+ bool enable : 1;
+ bool write : 1;
+ bool test : 1;
+ enum fd_lrz_direction direction : 2;
- /* this comes from the fs program state, rather than zsa: */
- enum a6xx_ztest_mode z_mode : 2;
+ /* this comes from the fs program state, rather than zsa: */
+ enum a6xx_ztest_mode z_mode : 2;
};
struct fd6_context {
- struct fd_context base;
+ struct fd_context base;
- /* Two buffers related to hw binning / visibility stream (VSC).
- * Compared to previous generations
- * (1) we cannot specify individual buffers per VSC, instead
- * just a pitch and base address
- * (2) there is a second smaller buffer.. we also stash
- * VSC_BIN_SIZE at end of 2nd buffer.
- */
- struct fd_bo *vsc_draw_strm, *vsc_prim_strm;
+ /* Two buffers related to hw binning / visibility stream (VSC).
+ * Compared to previous generations
+ * (1) we cannot specify individual buffers per VSC, instead
+ * just a pitch and base address
+ * (2) there is a second smaller buffer.. we also stash
+ * VSC_BIN_SIZE at end of 2nd buffer.
+ */
+ struct fd_bo *vsc_draw_strm, *vsc_prim_strm;
- unsigned vsc_draw_strm_pitch, vsc_prim_strm_pitch;
+ unsigned vsc_draw_strm_pitch, vsc_prim_strm_pitch;
- /* The 'control' mem BO is used for various housekeeping
- * functions. See 'struct fd6_control'
- */
- struct fd_bo *control_mem;
- uint32_t seqno;
+ /* The 'control' mem BO is used for various housekeeping
+ * functions. See 'struct fd6_control'
+ */
+ struct fd_bo *control_mem;
+ uint32_t seqno;
- struct u_upload_mgr *border_color_uploader;
- struct pipe_resource *border_color_buf;
+ struct u_upload_mgr *border_color_uploader;
+ struct pipe_resource *border_color_buf;
- /* storage for ctx->last.key: */
- struct ir3_shader_key last_key;
+ /* storage for ctx->last.key: */
+ struct ir3_shader_key last_key;
- /* Is there current VS driver-param state set? */
- bool has_dp_state;
+ /* Is there current VS driver-param state set? */
+ bool has_dp_state;
- /* number of active samples-passed queries: */
- int samples_passed_queries;
+ /* number of active samples-passed queries: */
+ int samples_passed_queries;
- /* cached stateobjs to avoid hashtable lookup when not dirty: */
- const struct fd6_program_state *prog;
+ /* cached stateobjs to avoid hashtable lookup when not dirty: */
+ const struct fd6_program_state *prog;
- uint16_t tex_seqno;
- struct hash_table *tex_cache;
+ uint16_t tex_seqno;
+ struct hash_table *tex_cache;
- struct {
- /* previous binning/draw lrz state, which is a function of multiple
- * gallium stateobjs, but doesn't necessarily change as frequently:
- */
- struct fd6_lrz_state lrz[2];
- } last;
+ struct {
+ /* previous binning/draw lrz state, which is a function of multiple
+ * gallium stateobjs, but doesn't necessarily change as frequently:
+ */
+ struct fd6_lrz_state lrz[2];
+ } last;
};
static inline struct fd6_context *
fd6_context(struct fd_context *ctx)
{
- return (struct fd6_context *)ctx;
+ return (struct fd6_context *)ctx;
}
-struct pipe_context *
-fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
-
+struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv,
+ unsigned flags);
/* This struct defines the layout of the fd6_context::control buffer: */
struct fd6_control {
- uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
- uint32_t _pad0;
- volatile uint32_t vsc_overflow;
- uint32_t _pad1[5];
-
- /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
- struct {
- uint32_t offset;
- uint32_t pad[7];
- } flush_base[4];
+ uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
+ uint32_t _pad0;
+ volatile uint32_t vsc_overflow;
+ uint32_t _pad1[5];
+
+ /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
+ struct {
+ uint32_t offset;
+ uint32_t pad[7];
+ } flush_base[4];
};
-#define control_ptr(fd6_ctx, member) \
- (fd6_ctx)->control_mem, offsetof(struct fd6_control, member), 0, 0
-
+#define control_ptr(fd6_ctx, member) \
+ (fd6_ctx)->control_mem, offsetof(struct fd6_control, member), 0, 0
static inline void
emit_marker6(struct fd_ringbuffer *ring, int scratch_idx)
{
- extern int32_t marker_cnt;
- unsigned reg = REG_A6XX_CP_SCRATCH_REG(scratch_idx);
- if (__EMIT_MARKER) {
- OUT_WFI5(ring);
- OUT_PKT4(ring, reg, 1);
- OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
- }
+ extern int32_t marker_cnt;
+ unsigned reg = REG_A6XX_CP_SCRATCH_REG(scratch_idx);
+ if (__EMIT_MARKER) {
+ OUT_WFI5(ring);
+ OUT_PKT4(ring, reg, 1);
+ OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
+ }
}
struct fd6_vertex_stateobj {
- struct fd_vertex_stateobj base;
- struct fd_ringbuffer *stateobj;
+ struct fd_vertex_stateobj base;
+ struct fd_ringbuffer *stateobj;
};
static inline struct fd6_vertex_stateobj *
fd6_vertex_stateobj(void *p)
{
- return (struct fd6_vertex_stateobj *) p;
+ return (struct fd6_vertex_stateobj *)p;
}
-
#endif /* FD6_CONTEXT_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
-#include "fd6_draw.h"
#include "fd6_context.h"
+#include "fd6_draw.h"
#include "fd6_emit.h"
-#include "fd6_program.h"
#include "fd6_format.h"
+#include "fd6_program.h"
#include "fd6_vsc.h"
#include "fd6_zsa.h"
#include "fd6_pack.h"
static void
-draw_emit_xfb(struct fd_ringbuffer *ring,
- struct CP_DRAW_INDX_OFFSET_0 *draw0,
- const struct pipe_draw_info *info,
+draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
+ const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect)
{
- struct fd_stream_output_target *target = fd_stream_output_target(indirect->count_from_stream_output);
- struct fd_resource *offset = fd_resource(target->offset_buf);
-
- /* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO.
- * Plus, for the common case where the counter buffer is written by
- * vkCmdEndTransformFeedback, we need to wait for the CP_WAIT_MEM_WRITES to
- * complete which means we need a WAIT_FOR_ME anyway.
- */
- OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
-
- OUT_PKT7(ring, CP_DRAW_AUTO, 6);
- OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
- OUT_RING(ring, info->instance_count);
- OUT_RELOC(ring, offset->bo, 0, 0, 0);
- OUT_RING(ring, 0); /* byte counter offset subtraced from the value read from above */
- OUT_RING(ring, target->stride);
+ struct fd_stream_output_target *target =
+ fd_stream_output_target(indirect->count_from_stream_output);
+ struct fd_resource *offset = fd_resource(target->offset_buf);
+
+ /* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO.
+ * Plus, for the common case where the counter buffer is written by
+ * vkCmdEndTransformFeedback, we need to wait for the CP_WAIT_MEM_WRITES to
+ * complete which means we need a WAIT_FOR_ME anyway.
+ */
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ OUT_PKT7(ring, CP_DRAW_AUTO, 6);
+ OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
+ OUT_RING(ring, info->instance_count);
+ OUT_RELOC(ring, offset->bo, 0, 0, 0);
+ OUT_RING(
+ ring,
+ 0); /* byte counter offset subtraced from the value read from above */
+ OUT_RING(ring, target->stride);
}
static void
draw_emit_indirect(struct fd_ringbuffer *ring,
- struct CP_DRAW_INDX_OFFSET_0 *draw0,
- const struct pipe_draw_info *info,
- const struct pipe_draw_indirect_info *indirect,
- unsigned index_offset)
+ struct CP_DRAW_INDX_OFFSET_0 *draw0,
+ const struct pipe_draw_info *info,
+ const struct pipe_draw_indirect_info *indirect,
+ unsigned index_offset)
{
- struct fd_resource *ind = fd_resource(indirect->buffer);
-
- if (info->index_size) {
- struct pipe_resource *idx = info->index.resource;
- unsigned max_indices = (idx->width0 - index_offset) / info->index_size;
-
- OUT_PKT(ring, CP_DRAW_INDX_INDIRECT,
- pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- A5XX_CP_DRAW_INDX_INDIRECT_INDX_BASE(
- fd_resource(idx)->bo, index_offset),
- A5XX_CP_DRAW_INDX_INDIRECT_3(.max_indices = max_indices),
- A5XX_CP_DRAW_INDX_INDIRECT_INDIRECT(
- ind->bo, indirect->offset)
- );
- } else {
- OUT_PKT(ring, CP_DRAW_INDIRECT,
- pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- A5XX_CP_DRAW_INDIRECT_INDIRECT(
- ind->bo, indirect->offset)
- );
- }
+ struct fd_resource *ind = fd_resource(indirect->buffer);
+
+ if (info->index_size) {
+ struct pipe_resource *idx = info->index.resource;
+ unsigned max_indices = (idx->width0 - index_offset) / info->index_size;
+
+ OUT_PKT(ring, CP_DRAW_INDX_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
+ A5XX_CP_DRAW_INDX_INDIRECT_INDX_BASE(fd_resource(idx)->bo,
+ index_offset),
+ A5XX_CP_DRAW_INDX_INDIRECT_3(.max_indices = max_indices),
+ A5XX_CP_DRAW_INDX_INDIRECT_INDIRECT(ind->bo, indirect->offset));
+ } else {
+ OUT_PKT(ring, CP_DRAW_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
+ A5XX_CP_DRAW_INDIRECT_INDIRECT(ind->bo, indirect->offset));
+ }
}
static void
-draw_emit(struct fd_ringbuffer *ring,
- struct CP_DRAW_INDX_OFFSET_0 *draw0,
- const struct pipe_draw_info *info,
- const struct pipe_draw_start_count *draw,
- unsigned index_offset)
+draw_emit(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
+ const struct pipe_draw_info *info,
+ const struct pipe_draw_start_count *draw, unsigned index_offset)
{
- if (info->index_size) {
- assert(!info->has_user_indices);
-
- struct pipe_resource *idx_buffer = info->index.resource;
- unsigned max_indices = (idx_buffer->width0 - index_offset) / info->index_size;
-
- OUT_PKT(ring, CP_DRAW_INDX_OFFSET,
- pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
- CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count),
- CP_DRAW_INDX_OFFSET_3(.first_indx = draw->start),
- A5XX_CP_DRAW_INDX_OFFSET_INDX_BASE(
- fd_resource(idx_buffer)->bo, index_offset),
- A5XX_CP_DRAW_INDX_OFFSET_6(.max_indices = max_indices)
- );
- } else {
- OUT_PKT(ring, CP_DRAW_INDX_OFFSET,
- pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
- CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count)
- );
- }
+ if (info->index_size) {
+ assert(!info->has_user_indices);
+
+ struct pipe_resource *idx_buffer = info->index.resource;
+ unsigned max_indices =
+ (idx_buffer->width0 - index_offset) / info->index_size;
+
+ OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
+ CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
+ CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count),
+ CP_DRAW_INDX_OFFSET_3(.first_indx = draw->start),
+ A5XX_CP_DRAW_INDX_OFFSET_INDX_BASE(fd_resource(idx_buffer)->bo,
+ index_offset),
+ A5XX_CP_DRAW_INDX_OFFSET_6(.max_indices = max_indices));
+ } else {
+ OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
+ CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
+ CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count));
+ }
}
static void
-fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit)
- assert_dt
+fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt
{
- if (ctx->last.dirty ||
- (ctx->last.primitive_restart != emit->primitive_restart)) {
- /* rasterizer state is effected by primitive-restart: */
- fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
- ctx->last.primitive_restart = emit->primitive_restart;
- }
+ if (ctx->last.dirty ||
+ (ctx->last.primitive_restart != emit->primitive_restart)) {
+ /* rasterizer state is effected by primitive-restart: */
+ fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
+ ctx->last.primitive_restart = emit->primitive_restart;
+ }
}
static bool
fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw,
- unsigned index_offset)
- assert_dt
+ unsigned index_offset) assert_dt
{
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs);
- struct fd6_emit emit = {
- .ctx = ctx,
- .vtx = &ctx->vtx,
- .info = info,
- .indirect = indirect,
- .draw = draw,
- .key = {
- .vs = ctx->prog.vs,
- .gs = ctx->prog.gs,
- .fs = ctx->prog.fs,
- .key = {
- .rasterflat = ctx->rasterizer->flatshade,
- .layer_zero = !gs_info || !(gs_info->outputs_written & VARYING_BIT_LAYER),
- .sample_shading = (ctx->min_samples > 1),
- .msaa = (ctx->framebuffer.samples > 1),
- },
- },
- .rasterflat = ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
- .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
- .primitive_restart = info->primitive_restart && info->index_size,
- };
-
- if (!(ctx->prog.vs && ctx->prog.fs))
- return false;
-
- if (info->mode == PIPE_PRIM_PATCHES) {
- emit.key.hs = ctx->prog.hs;
- emit.key.ds = ctx->prog.ds;
-
- if (!(ctx->prog.hs && ctx->prog.ds))
- return false;
-
- struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
- emit.key.key.tessellation = ir3_tess_mode(ds_info->tess.primitive_mode);
- ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
- }
-
- if (emit.key.gs) {
- emit.key.key.has_gs = true;
- ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
- }
-
- if (!(emit.key.hs || emit.key.ds || emit.key.gs || indirect))
- fd6_vsc_update_sizes(ctx->batch, info, draw);
-
- ir3_fixup_shader_state(&ctx->base, &emit.key.key);
-
- if (!(ctx->dirty & FD_DIRTY_PROG)) {
- emit.prog = fd6_ctx->prog;
- } else {
- fd6_ctx->prog = fd6_emit_get_prog(&emit);
- }
-
- /* bail if compile failed: */
- if (!fd6_ctx->prog)
- return false;
-
- fixup_draw_state(ctx, &emit);
-
- /* *after* fixup_shader_state(): */
- emit.dirty = ctx->dirty;
- emit.dirty_groups = ctx->gen_dirty;
-
- emit.bs = fd6_emit_get_prog(&emit)->bs;
- emit.vs = fd6_emit_get_prog(&emit)->vs;
- emit.hs = fd6_emit_get_prog(&emit)->hs;
- emit.ds = fd6_emit_get_prog(&emit)->ds;
- emit.gs = fd6_emit_get_prog(&emit)->gs;
- emit.fs = fd6_emit_get_prog(&emit)->fs;
-
- if (emit.vs->need_driver_params || fd6_ctx->has_dp_state)
- emit.dirty_groups |= BIT(FD6_GROUP_VS_DRIVER_PARAMS);
-
- /* If we are doing xfb, we need to emit the xfb state on every draw: */
- if (emit.prog->stream_output)
- emit.dirty_groups |= BIT(FD6_GROUP_SO);
-
- if (unlikely(ctx->stats_users > 0)) {
- ctx->stats.vs_regs += ir3_shader_halfregs(emit.vs);
- ctx->stats.hs_regs += COND(emit.hs, ir3_shader_halfregs(emit.hs));
- ctx->stats.ds_regs += COND(emit.ds, ir3_shader_halfregs(emit.ds));
- ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
- ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
- }
-
- struct fd_ringbuffer *ring = ctx->batch->draw;
-
- struct CP_DRAW_INDX_OFFSET_0 draw0 = {
- .prim_type = ctx->primtypes[info->mode],
- .vis_cull = USE_VISIBILITY,
- .gs_enable = !!emit.key.gs,
- };
-
- if (indirect && indirect->count_from_stream_output) {
- draw0.source_select= DI_SRC_SEL_AUTO_XFB;
- } else if (info->index_size) {
- draw0.source_select = DI_SRC_SEL_DMA;
- draw0.index_size = fd4_size2indextype(info->index_size);
- } else {
- draw0.source_select = DI_SRC_SEL_AUTO_INDEX;
- }
-
- if (info->mode == PIPE_PRIM_PATCHES) {
- shader_info *ds_info = &emit.ds->shader->nir->info;
- uint32_t factor_stride;
-
- switch (ds_info->tess.primitive_mode) {
- case GL_ISOLINES:
- draw0.patch_type = TESS_ISOLINES;
- factor_stride = 12;
- break;
- case GL_TRIANGLES:
- draw0.patch_type = TESS_TRIANGLES;
- factor_stride = 20;
- break;
- case GL_QUADS:
- draw0.patch_type = TESS_QUADS;
- factor_stride = 28;
- break;
- default:
- unreachable("bad tessmode");
- }
-
- draw0.prim_type = DI_PT_PATCHES0 + info->vertices_per_patch;
- draw0.tess_enable = true;
-
- const unsigned max_count = 2048;
- unsigned count;
-
- /**
- * We can cap tessparam/tessfactor buffer sizes at the sub-draw
- * limit. But in the indirect-draw case we must assume the worst.
- */
- if (indirect && indirect->buffer) {
- count = ALIGN_NPOT(max_count, info->vertices_per_patch);
- } else {
- count = MIN2(max_count, draw->count);
- count = ALIGN_NPOT(count, info->vertices_per_patch);
- }
-
- OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
- OUT_RING(ring, count);
-
- ctx->batch->tessellation = true;
- ctx->batch->tessparam_size = MAX2(ctx->batch->tessparam_size,
- emit.hs->output_size * 4 * count);
- ctx->batch->tessfactor_size = MAX2(ctx->batch->tessfactor_size,
- factor_stride * count);
-
- if (!ctx->batch->tess_addrs_constobj) {
- /* Reserve space for the bo address - we'll write them later in
- * setup_tess_buffers(). We need 2 bo address, but indirect
- * constant upload needs at least 4 vec4s.
- */
- unsigned size = 4 * 16;
-
- ctx->batch->tess_addrs_constobj = fd_submit_new_ringbuffer(
- ctx->batch->submit, size, FD_RINGBUFFER_STREAMING);
-
- ctx->batch->tess_addrs_constobj->cur += size;
- }
- }
-
- uint32_t index_start = info->index_size ? info->index_bias : draw->start;
- if (ctx->last.dirty || (ctx->last.index_start != index_start)) {
- OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
- OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
- ctx->last.index_start = index_start;
- }
-
- if (ctx->last.dirty || (ctx->last.instance_start != info->start_instance)) {
- OUT_PKT4(ring, REG_A6XX_VFD_INSTANCE_START_OFFSET, 1);
- OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
- ctx->last.instance_start = info->start_instance;
- }
-
- uint32_t restart_index = info->primitive_restart ? info->restart_index : 0xffffffff;
- if (ctx->last.dirty || (ctx->last.restart_index != restart_index)) {
- OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1);
- OUT_RING(ring, restart_index); /* PC_RESTART_INDEX */
- ctx->last.restart_index = restart_index;
- }
-
- // TODO move fd6_emit_streamout.. I think..
- if (emit.dirty_groups)
- fd6_emit_state(ring, &emit);
-
- /* for debug after a lock up, write a unique counter value
- * to scratch7 for each draw, to make it easier to match up
- * register dumps to cmdstream. The combination of IB
- * (scratch6) and DRAW is enough to "triangulate" the
- * particular draw that caused lockup.
- */
- emit_marker6(ring, 7);
-
- if (indirect) {
- if (indirect->count_from_stream_output) {
- draw_emit_xfb(ring, &draw0, info, indirect);
- } else {
- draw_emit_indirect(ring, &draw0, info, indirect, index_offset);
- }
- } else {
- draw_emit(ring, &draw0, info, draw, index_offset);
- }
-
- emit_marker6(ring, 7);
- fd_reset_wfi(ctx->batch);
-
- if (emit.streamout_mask) {
- struct fd_ringbuffer *ring = ctx->batch->draw;
-
- for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
- if (emit.streamout_mask & (1 << i)) {
- fd6_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
- }
- }
- }
-
- fd_context_all_clean(ctx);
-
- return true;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs);
+ struct fd6_emit emit = {
+ .ctx = ctx,
+ .vtx = &ctx->vtx,
+ .info = info,
+ .indirect = indirect,
+ .draw = draw,
+ .key =
+ {
+ .vs = ctx->prog.vs,
+ .gs = ctx->prog.gs,
+ .fs = ctx->prog.fs,
+ .key =
+ {
+ .rasterflat = ctx->rasterizer->flatshade,
+ .layer_zero = !gs_info ||
+ !(gs_info->outputs_written & VARYING_BIT_LAYER),
+ .sample_shading = (ctx->min_samples > 1),
+ .msaa = (ctx->framebuffer.samples > 1),
+ },
+ },
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+ .primitive_restart = info->primitive_restart && info->index_size,
+ };
+
+ if (!(ctx->prog.vs && ctx->prog.fs))
+ return false;
+
+ if (info->mode == PIPE_PRIM_PATCHES) {
+ emit.key.hs = ctx->prog.hs;
+ emit.key.ds = ctx->prog.ds;
+
+ if (!(ctx->prog.hs && ctx->prog.ds))
+ return false;
+
+ struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
+ emit.key.key.tessellation = ir3_tess_mode(ds_info->tess.primitive_mode);
+ ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
+ }
+
+ if (emit.key.gs) {
+ emit.key.key.has_gs = true;
+ ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
+ }
+
+ if (!(emit.key.hs || emit.key.ds || emit.key.gs || indirect))
+ fd6_vsc_update_sizes(ctx->batch, info, draw);
+
+ ir3_fixup_shader_state(&ctx->base, &emit.key.key);
+
+ if (!(ctx->dirty & FD_DIRTY_PROG)) {
+ emit.prog = fd6_ctx->prog;
+ } else {
+ fd6_ctx->prog = fd6_emit_get_prog(&emit);
+ }
+
+ /* bail if compile failed: */
+ if (!fd6_ctx->prog)
+ return false;
+
+ fixup_draw_state(ctx, &emit);
+
+ /* *after* fixup_shader_state(): */
+ emit.dirty = ctx->dirty;
+ emit.dirty_groups = ctx->gen_dirty;
+
+ emit.bs = fd6_emit_get_prog(&emit)->bs;
+ emit.vs = fd6_emit_get_prog(&emit)->vs;
+ emit.hs = fd6_emit_get_prog(&emit)->hs;
+ emit.ds = fd6_emit_get_prog(&emit)->ds;
+ emit.gs = fd6_emit_get_prog(&emit)->gs;
+ emit.fs = fd6_emit_get_prog(&emit)->fs;
+
+ if (emit.vs->need_driver_params || fd6_ctx->has_dp_state)
+ emit.dirty_groups |= BIT(FD6_GROUP_VS_DRIVER_PARAMS);
+
+ /* If we are doing xfb, we need to emit the xfb state on every draw: */
+ if (emit.prog->stream_output)
+ emit.dirty_groups |= BIT(FD6_GROUP_SO);
+
+ if (unlikely(ctx->stats_users > 0)) {
+ ctx->stats.vs_regs += ir3_shader_halfregs(emit.vs);
+ ctx->stats.hs_regs += COND(emit.hs, ir3_shader_halfregs(emit.hs));
+ ctx->stats.ds_regs += COND(emit.ds, ir3_shader_halfregs(emit.ds));
+ ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
+ ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
+ }
+
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+
+ struct CP_DRAW_INDX_OFFSET_0 draw0 = {
+ .prim_type = ctx->primtypes[info->mode],
+ .vis_cull = USE_VISIBILITY,
+ .gs_enable = !!emit.key.gs,
+ };
+
+ if (indirect && indirect->count_from_stream_output) {
+ draw0.source_select = DI_SRC_SEL_AUTO_XFB;
+ } else if (info->index_size) {
+ draw0.source_select = DI_SRC_SEL_DMA;
+ draw0.index_size = fd4_size2indextype(info->index_size);
+ } else {
+ draw0.source_select = DI_SRC_SEL_AUTO_INDEX;
+ }
+
+ if (info->mode == PIPE_PRIM_PATCHES) {
+ shader_info *ds_info = &emit.ds->shader->nir->info;
+ uint32_t factor_stride;
+
+ switch (ds_info->tess.primitive_mode) {
+ case GL_ISOLINES:
+ draw0.patch_type = TESS_ISOLINES;
+ factor_stride = 12;
+ break;
+ case GL_TRIANGLES:
+ draw0.patch_type = TESS_TRIANGLES;
+ factor_stride = 20;
+ break;
+ case GL_QUADS:
+ draw0.patch_type = TESS_QUADS;
+ factor_stride = 28;
+ break;
+ default:
+ unreachable("bad tessmode");
+ }
+
+ draw0.prim_type = DI_PT_PATCHES0 + info->vertices_per_patch;
+ draw0.tess_enable = true;
+
+ const unsigned max_count = 2048;
+ unsigned count;
+
+ /**
+ * We can cap tessparam/tessfactor buffer sizes at the sub-draw
+ * limit. But in the indirect-draw case we must assume the worst.
+ */
+ if (indirect && indirect->buffer) {
+ count = ALIGN_NPOT(max_count, info->vertices_per_patch);
+ } else {
+ count = MIN2(max_count, draw->count);
+ count = ALIGN_NPOT(count, info->vertices_per_patch);
+ }
+
+ OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
+ OUT_RING(ring, count);
+
+ ctx->batch->tessellation = true;
+ ctx->batch->tessparam_size =
+ MAX2(ctx->batch->tessparam_size, emit.hs->output_size * 4 * count);
+ ctx->batch->tessfactor_size =
+ MAX2(ctx->batch->tessfactor_size, factor_stride * count);
+
+ if (!ctx->batch->tess_addrs_constobj) {
+ /* Reserve space for the bo address - we'll write them later in
+ * setup_tess_buffers(). We need 2 bo address, but indirect
+ * constant upload needs at least 4 vec4s.
+ */
+ unsigned size = 4 * 16;
+
+ ctx->batch->tess_addrs_constobj = fd_submit_new_ringbuffer(
+ ctx->batch->submit, size, FD_RINGBUFFER_STREAMING);
+
+ ctx->batch->tess_addrs_constobj->cur += size;
+ }
+ }
+
+ uint32_t index_start = info->index_size ? info->index_bias : draw->start;
+ if (ctx->last.dirty || (ctx->last.index_start != index_start)) {
+ OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
+ OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
+ ctx->last.index_start = index_start;
+ }
+
+ if (ctx->last.dirty || (ctx->last.instance_start != info->start_instance)) {
+ OUT_PKT4(ring, REG_A6XX_VFD_INSTANCE_START_OFFSET, 1);
+ OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
+ ctx->last.instance_start = info->start_instance;
+ }
+
+ uint32_t restart_index =
+ info->primitive_restart ? info->restart_index : 0xffffffff;
+ if (ctx->last.dirty || (ctx->last.restart_index != restart_index)) {
+ OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, restart_index); /* PC_RESTART_INDEX */
+ ctx->last.restart_index = restart_index;
+ }
+
+ // TODO move fd6_emit_streamout.. I think..
+ if (emit.dirty_groups)
+ fd6_emit_state(ring, &emit);
+
+ /* for debug after a lock up, write a unique counter value
+ * to scratch7 for each draw, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB
+ * (scratch6) and DRAW is enough to "triangulate" the
+ * particular draw that caused lockup.
+ */
+ emit_marker6(ring, 7);
+
+ if (indirect) {
+ if (indirect->count_from_stream_output) {
+ draw_emit_xfb(ring, &draw0, info, indirect);
+ } else {
+ draw_emit_indirect(ring, &draw0, info, indirect, index_offset);
+ }
+ } else {
+ draw_emit(ring, &draw0, info, draw, index_offset);
+ }
+
+ emit_marker6(ring, 7);
+ fd_reset_wfi(ctx->batch);
+
+ if (emit.streamout_mask) {
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+
+ for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ if (emit.streamout_mask & (1 << i)) {
+ fd6_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
+ }
+ }
+ }
+
+ fd_context_all_clean(ctx);
+
+ return true;
}
static void
fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
{
- struct fd_ringbuffer *ring;
- struct fd_screen *screen = batch->ctx->screen;
-
- ring = fd_batch_get_prologue(batch);
-
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
- emit_marker6(ring, 7);
-
- OUT_WFI5(ring);
-
- OUT_REG(ring, A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
-
- OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
- .vs_state = true,
- .hs_state = true,
- .ds_state = true,
- .gs_state = true,
- .fs_state = true,
- .cs_state = true,
- .gfx_ibo = true,
- .cs_ibo = true,
- .gfx_shared_const = true,
- .gfx_bindless = 0x1f,
- .cs_bindless = 0x1f
- ));
-
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
- emit_marker6(ring, 7);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
- OUT_RING(ring, 0x0);
-
- OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
- OUT_RING(ring, 0x0000f410);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) |
- 0x4f00080);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
- OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) |
- 0x4f00080);
-
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
- OUT_RING(ring, fui(depth));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
- OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_16_UNORM) |
- A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
- OUT_RELOC(ring, zsbuf->lrz, 0, 0, 0);
- OUT_RING(ring, A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2).value);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_REG(ring,
- A6XX_GRAS_2D_SRC_TL_X(0),
- A6XX_GRAS_2D_SRC_BR_X(0),
- A6XX_GRAS_2D_SRC_TL_Y(0),
- A6XX_GRAS_2D_SRC_BR_Y(0));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
- OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) |
- A6XX_GRAS_2D_DST_TL_Y(0));
- OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
- A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
-
- fd6_event_write(batch, ring, 0x3f, false);
-
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
-
- OUT_PKT7(ring, CP_BLIT, 1);
- OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- OUT_WFI5(ring);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
- OUT_RING(ring, 0x0); /* RB_UNKNOWN_8E04 */
-
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
- fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
-
- fd6_cache_inv(batch, ring);
+ struct fd_ringbuffer *ring;
+ struct fd_screen *screen = batch->ctx->screen;
+
+ ring = fd_batch_get_prologue(batch);
+
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
+ emit_marker6(ring, 7);
+
+ OUT_WFI5(ring);
+
+ OUT_REG(ring,
+ A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
+
+ OUT_REG(ring,
+ A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
+ .ds_state = true, .gs_state = true,
+ .fs_state = true, .cs_state = true,
+ .gfx_ibo = true, .cs_ibo = true,
+ .gfx_shared_const = true,
+ .gfx_bindless = 0x1f, .cs_bindless = 0x1f));
+
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+ emit_marker6(ring, 7);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
+ OUT_RING(ring, 0x0000f410);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+ OUT_RING(ring,
+ A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) | 0x4f00080);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+ OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) | 0x4f00080);
+
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
+ OUT_RING(ring, fui(depth));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+ OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_16_UNORM) |
+ A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+ OUT_RELOC(ring, zsbuf->lrz, 0, 0, 0);
+ OUT_RING(ring, A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2).value);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_REG(ring, A6XX_GRAS_2D_SRC_TL_X(0), A6XX_GRAS_2D_SRC_BR_X(0),
+ A6XX_GRAS_2D_SRC_TL_Y(0), A6XX_GRAS_2D_SRC_BR_Y(0));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+ OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
+ OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
+ A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
+
+ fd6_event_write(batch, ring, 0x3f, false);
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, screen->info.a6xx.magic.RB_UNKNOWN_8E04_blit);
+
+ OUT_PKT7(ring, CP_BLIT, 1);
+ OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+ OUT_RING(ring, 0x0); /* RB_UNKNOWN_8E04 */
+
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+
+ fd6_cache_inv(batch, ring);
}
-static bool is_z32(enum pipe_format format)
+static bool
+is_z32(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- case PIPE_FORMAT_Z32_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- return true;
- default:
- return false;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return true;
+ default:
+ return false;
+ }
}
static bool
fd6_clear(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
- assert_dt
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) assert_dt
{
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- const bool has_depth = pfb->zsbuf;
- unsigned color_buffers = buffers >> 2;
-
- /* we need to do multisample clear on 3d pipe, so fallback to u_blitter: */
- if (pfb->samples > 1)
- return false;
-
- /* If we're clearing after draws, fallback to 3D pipe clears. We could
- * use blitter clears in the draw batch but then we'd have to patch up the
- * gmem offsets. This doesn't seem like a useful thing to optimize for
- * however.*/
- if (ctx->batch->num_draws > 0)
- return false;
-
- u_foreach_bit(i, color_buffers)
- ctx->batch->clear_color[i] = *color;
- if (buffers & PIPE_CLEAR_DEPTH)
- ctx->batch->clear_depth = depth;
- if (buffers & PIPE_CLEAR_STENCIL)
- ctx->batch->clear_stencil = stencil;
-
- ctx->batch->fast_cleared |= buffers;
-
- if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) {
- struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
- if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
- zsbuf->lrz_valid = true;
- zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
- fd6_clear_lrz(ctx->batch, zsbuf, depth);
- }
- }
-
- return true;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const bool has_depth = pfb->zsbuf;
+ unsigned color_buffers = buffers >> 2;
+
+ /* we need to do multisample clear on 3d pipe, so fallback to u_blitter: */
+ if (pfb->samples > 1)
+ return false;
+
+ /* If we're clearing after draws, fallback to 3D pipe clears. We could
+ * use blitter clears in the draw batch but then we'd have to patch up the
+ * gmem offsets. This doesn't seem like a useful thing to optimize for
+ * however.*/
+ if (ctx->batch->num_draws > 0)
+ return false;
+
+ u_foreach_bit (i, color_buffers)
+ ctx->batch->clear_color[i] = *color;
+ if (buffers & PIPE_CLEAR_DEPTH)
+ ctx->batch->clear_depth = depth;
+ if (buffers & PIPE_CLEAR_STENCIL)
+ ctx->batch->clear_stencil = stencil;
+
+ ctx->batch->fast_cleared |= buffers;
+
+ if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+ if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
+ zsbuf->lrz_valid = true;
+ zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
+ fd6_clear_lrz(ctx->batch, zsbuf, depth);
+ }
+ }
+
+ return true;
}
void
-fd6_draw_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->draw_vbo = fd6_draw_vbo;
- ctx->clear = fd6_clear;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd6_draw_vbo;
+ ctx->clear = fd6_clear;
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_helpers.h"
#include "util/format/u_format.h"
+#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "util/u_viewport.h"
+#include "common/freedreno_guardband.h"
+#include "freedreno_query_hw.h"
#include "freedreno_resource.h"
#include "freedreno_state.h"
#include "freedreno_tracepoints.h"
-#include "freedreno_query_hw.h"
-#include "common/freedreno_guardband.h"
-#include "fd6_emit.h"
#include "fd6_blend.h"
#include "fd6_const.h"
#include "fd6_context.h"
+#include "fd6_emit.h"
+#include "fd6_format.h"
#include "fd6_image.h"
#include "fd6_pack.h"
#include "fd6_program.h"
#include "fd6_rasterizer.h"
#include "fd6_texture.h"
-#include "fd6_format.h"
#include "fd6_zsa.h"
/* Border color layout is diff from a4xx/a5xx.. if it turns out to be
*/
struct PACKED bcolor_entry {
- uint32_t fp32[4];
- uint16_t ui16[4];
- int16_t si16[4];
- uint16_t fp16[4];
- uint16_t rgb565;
- uint16_t rgb5a1;
- uint16_t rgba4;
- uint8_t __pad0[2];
- uint8_t ui8[4];
- int8_t si8[4];
- uint32_t rgb10a2;
- uint32_t z24; /* also s8? */
- uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
- uint8_t __pad1[56];
+ uint32_t fp32[4];
+ uint16_t ui16[4];
+ int16_t si16[4];
+ uint16_t fp16[4];
+ uint16_t rgb565;
+ uint16_t rgb5a1;
+ uint16_t rgba4;
+ uint8_t __pad0[2];
+ uint8_t ui8[4];
+ int8_t si8[4];
+ uint32_t rgb10a2;
+ uint32_t z24; /* also s8? */
+ uint16_t
+ srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
+ uint8_t __pad1[56];
};
-#define FD6_BORDER_COLOR_SIZE sizeof(struct bcolor_entry)
-#define FD6_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE)
+#define FD6_BORDER_COLOR_SIZE sizeof(struct bcolor_entry)
+#define FD6_BORDER_COLOR_UPLOAD_SIZE \
+ (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE)
static void
-setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries)
+setup_border_colors(struct fd_texture_stateobj *tex,
+ struct bcolor_entry *entries)
{
- unsigned i, j;
- STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
-
- for (i = 0; i < tex->num_samplers; i++) {
- struct bcolor_entry *e = &entries[i];
- struct pipe_sampler_state *sampler = tex->samplers[i];
- union pipe_color_union *bc;
-
- if (!sampler)
- continue;
-
- bc = &sampler->border_color;
-
- /*
- * XXX HACK ALERT XXX
- *
- * The border colors need to be swizzled in a particular
- * format-dependent order. Even though samplers don't know about
- * formats, we can assume that with a GL state tracker, there's a
- * 1:1 correspondence between sampler and texture. Take advantage
- * of that knowledge.
- */
- if ((i >= tex->num_textures) || !tex->textures[i])
- continue;
-
- struct pipe_sampler_view *view = tex->textures[i];
- enum pipe_format format = view->format;
- const struct util_format_description *desc =
- util_format_description(format);
-
- e->rgb565 = 0;
- e->rgb5a1 = 0;
- e->rgba4 = 0;
- e->rgb10a2 = 0;
- e->z24 = 0;
-
- unsigned char swiz[4];
-
- fd6_tex_swiz(format, swiz,
- view->swizzle_r, view->swizzle_g,
- view->swizzle_b, view->swizzle_a);
-
- for (j = 0; j < 4; j++) {
- int c = swiz[j];
- int cd = c;
-
- /*
- * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
- * stencil border color value in bc->ui[0] but according
- * to desc->swizzle and desc->channel, the .x/.w component
- * is NONE and the stencil value is in the y component.
- * Meanwhile the hardware wants this in the .w component
- * for x24s8 and the .x component for x32_s8x24.
- */
- if ((format == PIPE_FORMAT_X24S8_UINT) ||
- (format == PIPE_FORMAT_X32_S8X24_UINT)) {
- if (j == 0) {
- c = 1;
- cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3;
- } else {
- continue;
- }
- }
-
- if (c >= 4)
- continue;
-
- if (desc->channel[c].pure_integer) {
- uint16_t clamped;
- switch (desc->channel[c].size) {
- case 2:
- assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
- clamped = CLAMP(bc->ui[j], 0, 0x3);
- break;
- case 8:
- if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
- clamped = CLAMP(bc->i[j], -128, 127);
- else
- clamped = CLAMP(bc->ui[j], 0, 255);
- break;
- case 10:
- assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
- clamped = CLAMP(bc->ui[j], 0, 0x3ff);
- break;
- case 16:
- if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
- clamped = CLAMP(bc->i[j], -32768, 32767);
- else
- clamped = CLAMP(bc->ui[j], 0, 65535);
- break;
- default:
- assert(!"Unexpected bit size");
- case 32:
- clamped = 0;
- break;
- }
- e->fp32[cd] = bc->ui[j];
- e->fp16[cd] = clamped;
- } else {
- float f = bc->f[j];
- float f_u = CLAMP(f, 0, 1);
- float f_s = CLAMP(f, -1, 1);
-
- e->fp32[c] = fui(f);
- e->fp16[c] = _mesa_float_to_half(f);
- e->srgb[c] = _mesa_float_to_half(f_u);
- e->ui16[c] = f_u * 0xffff;
- e->si16[c] = f_s * 0x7fff;
- e->ui8[c] = f_u * 0xff;
- e->si8[c] = f_s * 0x7f;
- if (c == 1)
- e->rgb565 |= (int)(f_u * 0x3f) << 5;
- else if (c < 3)
- e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
- if (c == 3)
- e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;
- else
- e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
- if (c == 3)
- e->rgb10a2 |= (int)(f_u * 0x3) << 30;
- else
- e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
- e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
- if (c == 0)
- e->z24 = f_u * 0xffffff;
- }
- }
+ unsigned i, j;
+ STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
+
+ for (i = 0; i < tex->num_samplers; i++) {
+ struct bcolor_entry *e = &entries[i];
+ struct pipe_sampler_state *sampler = tex->samplers[i];
+ union pipe_color_union *bc;
+
+ if (!sampler)
+ continue;
+
+ bc = &sampler->border_color;
+
+ /*
+ * XXX HACK ALERT XXX
+ *
+ * The border colors need to be swizzled in a particular
+ * format-dependent order. Even though samplers don't know about
+ * formats, we can assume that with a GL state tracker, there's a
+ * 1:1 correspondence between sampler and texture. Take advantage
+ * of that knowledge.
+ */
+ if ((i >= tex->num_textures) || !tex->textures[i])
+ continue;
+
+ struct pipe_sampler_view *view = tex->textures[i];
+ enum pipe_format format = view->format;
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ e->rgb565 = 0;
+ e->rgb5a1 = 0;
+ e->rgba4 = 0;
+ e->rgb10a2 = 0;
+ e->z24 = 0;
+
+ unsigned char swiz[4];
+
+ fd6_tex_swiz(format, swiz, view->swizzle_r, view->swizzle_g,
+ view->swizzle_b, view->swizzle_a);
+
+ for (j = 0; j < 4; j++) {
+ int c = swiz[j];
+ int cd = c;
+
+ /*
+ * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
+ * stencil border color value in bc->ui[0] but according
+ * to desc->swizzle and desc->channel, the .x/.w component
+ * is NONE and the stencil value is in the y component.
+ * Meanwhile the hardware wants this in the .w component
+ * for x24s8 and the .x component for x32_s8x24.
+ */
+ if ((format == PIPE_FORMAT_X24S8_UINT) ||
+ (format == PIPE_FORMAT_X32_S8X24_UINT)) {
+ if (j == 0) {
+ c = 1;
+ cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3;
+ } else {
+ continue;
+ }
+ }
+
+ if (c >= 4)
+ continue;
+
+ if (desc->channel[c].pure_integer) {
+ uint16_t clamped;
+ switch (desc->channel[c].size) {
+ case 2:
+ assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ clamped = CLAMP(bc->ui[j], 0, 0x3);
+ break;
+ case 8:
+ if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
+ clamped = CLAMP(bc->i[j], -128, 127);
+ else
+ clamped = CLAMP(bc->ui[j], 0, 255);
+ break;
+ case 10:
+ assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ clamped = CLAMP(bc->ui[j], 0, 0x3ff);
+ break;
+ case 16:
+ if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
+ clamped = CLAMP(bc->i[j], -32768, 32767);
+ else
+ clamped = CLAMP(bc->ui[j], 0, 65535);
+ break;
+ default:
+ assert(!"Unexpected bit size");
+ case 32:
+ clamped = 0;
+ break;
+ }
+ e->fp32[cd] = bc->ui[j];
+ e->fp16[cd] = clamped;
+ } else {
+ float f = bc->f[j];
+ float f_u = CLAMP(f, 0, 1);
+ float f_s = CLAMP(f, -1, 1);
+
+ e->fp32[c] = fui(f);
+ e->fp16[c] = _mesa_float_to_half(f);
+ e->srgb[c] = _mesa_float_to_half(f_u);
+ e->ui16[c] = f_u * 0xffff;
+ e->si16[c] = f_s * 0x7fff;
+ e->ui8[c] = f_u * 0xff;
+ e->si8[c] = f_s * 0x7f;
+ if (c == 1)
+ e->rgb565 |= (int)(f_u * 0x3f) << 5;
+ else if (c < 3)
+ e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
+ if (c == 3)
+ e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;
+ else
+ e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
+ if (c == 3)
+ e->rgb10a2 |= (int)(f_u * 0x3) << 30;
+ else
+ e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
+ e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
+ if (c == 0)
+ e->z24 = f_u * 0xffffff;
+ }
+ }
#ifdef DEBUG
- memset(&e->__pad0, 0, sizeof(e->__pad0));
- memset(&e->__pad1, 0, sizeof(e->__pad1));
+ memset(&e->__pad0, 0, sizeof(e->__pad0));
+ memset(&e->__pad1, 0, sizeof(e->__pad1));
#endif
- }
+ }
}
static void
-emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
- assert_dt
+emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
{
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct bcolor_entry *entries;
- unsigned off;
- void *ptr;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct bcolor_entry *entries;
+ unsigned off;
+ void *ptr;
- STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
+ STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
- u_upload_alloc(fd6_ctx->border_color_uploader,
- 0, FD6_BORDER_COLOR_UPLOAD_SIZE,
- FD6_BORDER_COLOR_UPLOAD_SIZE, &off,
- &fd6_ctx->border_color_buf,
- &ptr);
+ u_upload_alloc(fd6_ctx->border_color_uploader, 0,
+ FD6_BORDER_COLOR_UPLOAD_SIZE, FD6_BORDER_COLOR_UPLOAD_SIZE,
+ &off, &fd6_ctx->border_color_buf, &ptr);
- entries = ptr;
+ entries = ptr;
- setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
- setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
- &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
+ setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
+ setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
+ &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
- OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);
- OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);
+ OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0);
- u_upload_unmap(fd6_ctx->border_color_uploader);
+ u_upload_unmap(fd6_ctx->border_color_uploader);
}
static void
-fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx)
- assert_dt
+fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt
{
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- struct pipe_surface *psurf = pfb->cbufs[0];
- struct fd_resource *rsc = fd_resource(psurf->texture);
-
- uint32_t texconst0 = fd6_tex_const_0(psurf->texture, psurf->u.tex.level,
- psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
-
- /* always TILE6_2 mode in GMEM.. which also means no swap: */
- texconst0 &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
- texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
-
- OUT_RING(state, texconst0);
- OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) |
- A6XX_TEX_CONST_1_HEIGHT(pfb->height));
- OUT_RINGP(state, A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
- &ctx->batch->fb_read_patches);
- OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size));
-
- OUT_RING(state, A6XX_TEX_CONST_4_BASE_LO(ctx->screen->gmem_base));
- OUT_RING(state, A6XX_TEX_CONST_5_BASE_HI(ctx->screen->gmem_base >> 32) |
- A6XX_TEX_CONST_5_DEPTH(1));
- OUT_RING(state, 0); /* texconst6 */
- OUT_RING(state, 0); /* texconst7 */
- OUT_RING(state, 0); /* texconst8 */
- OUT_RING(state, 0); /* texconst9 */
- OUT_RING(state, 0); /* texconst10 */
- OUT_RING(state, 0); /* texconst11 */
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- OUT_RING(state, 0);
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ struct pipe_surface *psurf = pfb->cbufs[0];
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+
+ uint32_t texconst0 = fd6_tex_const_0(
+ psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
+
+ /* always TILE6_2 mode in GMEM.. which also means no swap: */
+ texconst0 &=
+ ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
+ texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
+
+ OUT_RING(state, texconst0);
+ OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) |
+ A6XX_TEX_CONST_1_HEIGHT(pfb->height));
+ OUT_RINGP(state, A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
+ &ctx->batch->fb_read_patches);
+ OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size));
+
+ OUT_RING(state, A6XX_TEX_CONST_4_BASE_LO(ctx->screen->gmem_base));
+ OUT_RING(state, A6XX_TEX_CONST_5_BASE_HI(ctx->screen->gmem_base >> 32) |
+ A6XX_TEX_CONST_5_DEPTH(1));
+ OUT_RING(state, 0); /* texconst6 */
+ OUT_RING(state, 0); /* texconst7 */
+ OUT_RING(state, 0); /* texconst8 */
+ OUT_RING(state, 0); /* texconst9 */
+ OUT_RING(state, 0); /* texconst10 */
+ OUT_RING(state, 0); /* texconst11 */
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
}
bool
fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type type, struct fd_texture_stateobj *tex,
- unsigned bcolor_offset,
- /* can be NULL if no image/SSBO/fb state to merge in: */
- const struct ir3_shader_variant *v)
+ enum pipe_shader_type type, struct fd_texture_stateobj *tex,
+ unsigned bcolor_offset,
+ /* can be NULL if no image/SSBO/fb state to merge in: */
+ const struct ir3_shader_variant *v)
{
- bool needs_border = false;
- unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
- enum a6xx_state_block sb;
-
- switch (type) {
- case PIPE_SHADER_VERTEX:
- sb = SB6_VS_TEX;
- opcode = CP_LOAD_STATE6_GEOM;
- tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_VS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
- break;
- case PIPE_SHADER_TESS_CTRL:
- sb = SB6_HS_TEX;
- opcode = CP_LOAD_STATE6_GEOM;
- tex_samp_reg = REG_A6XX_SP_HS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_HS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_HS_TEX_COUNT;
- break;
- case PIPE_SHADER_TESS_EVAL:
- sb = SB6_DS_TEX;
- opcode = CP_LOAD_STATE6_GEOM;
- tex_samp_reg = REG_A6XX_SP_DS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_DS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_DS_TEX_COUNT;
- break;
- case PIPE_SHADER_GEOMETRY:
- sb = SB6_GS_TEX;
- opcode = CP_LOAD_STATE6_GEOM;
- tex_samp_reg = REG_A6XX_SP_GS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_GS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_GS_TEX_COUNT;
- break;
- case PIPE_SHADER_FRAGMENT:
- sb = SB6_FS_TEX;
- opcode = CP_LOAD_STATE6_FRAG;
- tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_FS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
- break;
- case PIPE_SHADER_COMPUTE:
- sb = SB6_CS_TEX;
- opcode = CP_LOAD_STATE6_FRAG;
- tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP;
- tex_const_reg = REG_A6XX_SP_CS_TEX_CONST;
- tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
- break;
- default:
- unreachable("bad state block");
- }
-
- if (tex->num_samplers > 0) {
- struct fd_ringbuffer *state =
- fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4 * 4);
- for (unsigned i = 0; i < tex->num_samplers; i++) {
- static const struct fd6_sampler_stateobj dummy_sampler = {};
- const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ?
- fd6_sampler_stateobj(tex->samplers[i]) : &dummy_sampler;
- OUT_RING(state, sampler->texsamp0);
- OUT_RING(state, sampler->texsamp1);
- OUT_RING(state, sampler->texsamp2 |
- A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset));
- OUT_RING(state, sampler->texsamp3);
- needs_border |= sampler->needs_border;
- }
-
- /* output sampler state: */
- OUT_PKT7(ring, opcode, 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers));
- OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
-
- OUT_PKT4(ring, tex_samp_reg, 2);
- OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
-
- fd_ringbuffer_del(state);
- }
-
- unsigned num_merged_textures = tex->num_textures;
- unsigned num_textures = tex->num_textures;
- if (v) {
- num_merged_textures += v->image_mapping.num_tex;
-
- if (v->fb_read)
- num_merged_textures++;
-
- /* There could be more bound textures than what the shader uses.
- * Which isn't known at shader compile time. So in the case we
- * are merging tex state, only emit the textures that the shader
- * uses (since the image/SSBO related tex state comes immediately
- * after)
- */
- num_textures = v->image_mapping.tex_base;
- }
-
- if (num_merged_textures > 0) {
- struct fd_ringbuffer *state =
- fd_ringbuffer_new_object(ctx->pipe, num_merged_textures * 16 * 4);
- for (unsigned i = 0; i < num_textures; i++) {
- const struct fd6_pipe_sampler_view *view;
-
- if (tex->textures[i]) {
- view = fd6_pipe_sampler_view(tex->textures[i]);
- if (unlikely(view->rsc_seqno != fd_resource(view->base.texture)->seqno)) {
- fd6_sampler_view_update(ctx,
- fd6_pipe_sampler_view(tex->textures[i]));
- }
- } else {
- static const struct fd6_pipe_sampler_view dummy_view = {};
- view = &dummy_view;
- }
-
- OUT_RING(state, view->texconst0);
- OUT_RING(state, view->texconst1);
- OUT_RING(state, view->texconst2);
- OUT_RING(state, view->texconst3);
-
- if (view->ptr1) {
- OUT_RELOC(state, view->ptr1->bo, view->offset1,
- (uint64_t)view->texconst5 << 32, 0);
- } else {
- OUT_RING(state, 0x00000000);
- OUT_RING(state, view->texconst5);
- }
-
- OUT_RING(state, view->texconst6);
-
- if (view->ptr2) {
- OUT_RELOC(state, view->ptr2->bo, view->offset2, 0, 0);
- } else {
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- }
-
- OUT_RING(state, view->texconst9);
- OUT_RING(state, view->texconst10);
- OUT_RING(state, view->texconst11);
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- OUT_RING(state, 0);
- }
-
- if (v) {
- const struct ir3_ibo_mapping *mapping = &v->image_mapping;
- struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];
- struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];
-
- for (unsigned i = 0; i < mapping->num_tex; i++) {
- unsigned idx = mapping->tex_to_image[i];
- if (idx & IBO_SSBO) {
- fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]);
- } else {
- fd6_emit_image_tex(state, &img->si[idx]);
- }
- }
-
- if (v->fb_read) {
- fd6_emit_fb_tex(state, ctx);
- }
- }
-
- /* emit texture state: */
- OUT_PKT7(ring, opcode, 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));
- OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
-
- OUT_PKT4(ring, tex_const_reg, 2);
- OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
-
- fd_ringbuffer_del(state);
- }
-
- OUT_PKT4(ring, tex_count_reg, 1);
- OUT_RING(ring, num_merged_textures);
-
- return needs_border;
+ bool needs_border = false;
+ unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
+ enum a6xx_state_block sb;
+
+ switch (type) {
+ case PIPE_SHADER_VERTEX:
+ sb = SB6_VS_TEX;
+ opcode = CP_LOAD_STATE6_GEOM;
+ tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_VS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ sb = SB6_HS_TEX;
+ opcode = CP_LOAD_STATE6_GEOM;
+ tex_samp_reg = REG_A6XX_SP_HS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_HS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_HS_TEX_COUNT;
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ sb = SB6_DS_TEX;
+ opcode = CP_LOAD_STATE6_GEOM;
+ tex_samp_reg = REG_A6XX_SP_DS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_DS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_DS_TEX_COUNT;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ sb = SB6_GS_TEX;
+ opcode = CP_LOAD_STATE6_GEOM;
+ tex_samp_reg = REG_A6XX_SP_GS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_GS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_GS_TEX_COUNT;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ sb = SB6_FS_TEX;
+ opcode = CP_LOAD_STATE6_FRAG;
+ tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_FS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
+ break;
+ case PIPE_SHADER_COMPUTE:
+ sb = SB6_CS_TEX;
+ opcode = CP_LOAD_STATE6_FRAG;
+ tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP;
+ tex_const_reg = REG_A6XX_SP_CS_TEX_CONST;
+ tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
+ break;
+ default:
+ unreachable("bad state block");
+ }
+
+ if (tex->num_samplers > 0) {
+ struct fd_ringbuffer *state =
+ fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4 * 4);
+ for (unsigned i = 0; i < tex->num_samplers; i++) {
+ static const struct fd6_sampler_stateobj dummy_sampler = {};
+ const struct fd6_sampler_stateobj *sampler =
+ tex->samplers[i] ? fd6_sampler_stateobj(tex->samplers[i])
+ : &dummy_sampler;
+ OUT_RING(state, sampler->texsamp0);
+ OUT_RING(state, sampler->texsamp1);
+ OUT_RING(state, sampler->texsamp2 |
+ A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset));
+ OUT_RING(state, sampler->texsamp3);
+ needs_border |= sampler->needs_border;
+ }
+
+ /* output sampler state: */
+ OUT_PKT7(ring, opcode, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers));
+ OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
+
+ OUT_PKT4(ring, tex_samp_reg, 2);
+ OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
+
+ fd_ringbuffer_del(state);
+ }
+
+ unsigned num_merged_textures = tex->num_textures;
+ unsigned num_textures = tex->num_textures;
+ if (v) {
+ num_merged_textures += v->image_mapping.num_tex;
+
+ if (v->fb_read)
+ num_merged_textures++;
+
+ /* There could be more bound textures than what the shader uses.
+ * Which isn't known at shader compile time. So in the case we
+ * are merging tex state, only emit the textures that the shader
+ * uses (since the image/SSBO related tex state comes immediately
+ * after)
+ */
+ num_textures = v->image_mapping.tex_base;
+ }
+
+ if (num_merged_textures > 0) {
+ struct fd_ringbuffer *state =
+ fd_ringbuffer_new_object(ctx->pipe, num_merged_textures * 16 * 4);
+ for (unsigned i = 0; i < num_textures; i++) {
+ const struct fd6_pipe_sampler_view *view;
+
+ if (tex->textures[i]) {
+ view = fd6_pipe_sampler_view(tex->textures[i]);
+ if (unlikely(view->rsc_seqno !=
+ fd_resource(view->base.texture)->seqno)) {
+ fd6_sampler_view_update(ctx,
+ fd6_pipe_sampler_view(tex->textures[i]));
+ }
+ } else {
+ static const struct fd6_pipe_sampler_view dummy_view = {};
+ view = &dummy_view;
+ }
+
+ OUT_RING(state, view->texconst0);
+ OUT_RING(state, view->texconst1);
+ OUT_RING(state, view->texconst2);
+ OUT_RING(state, view->texconst3);
+
+ if (view->ptr1) {
+ OUT_RELOC(state, view->ptr1->bo, view->offset1,
+ (uint64_t)view->texconst5 << 32, 0);
+ } else {
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, view->texconst5);
+ }
+
+ OUT_RING(state, view->texconst6);
+
+ if (view->ptr2) {
+ OUT_RELOC(state, view->ptr2->bo, view->offset2, 0, 0);
+ } else {
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ }
+
+ OUT_RING(state, view->texconst9);
+ OUT_RING(state, view->texconst10);
+ OUT_RING(state, view->texconst11);
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ OUT_RING(state, 0);
+ }
+
+ if (v) {
+ const struct ir3_ibo_mapping *mapping = &v->image_mapping;
+ struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];
+ struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];
+
+ for (unsigned i = 0; i < mapping->num_tex; i++) {
+ unsigned idx = mapping->tex_to_image[i];
+ if (idx & IBO_SSBO) {
+ fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]);
+ } else {
+ fd6_emit_image_tex(state, &img->si[idx]);
+ }
+ }
+
+ if (v->fb_read) {
+ fd6_emit_fb_tex(state, ctx);
+ }
+ }
+
+ /* emit texture state: */
+ OUT_PKT7(ring, opcode, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));
+ OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
+
+ OUT_PKT4(ring, tex_const_reg, 2);
+ OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
+
+ fd_ringbuffer_del(state);
+ }
+
+ OUT_PKT4(ring, tex_count_reg, 1);
+ OUT_RING(ring, num_merged_textures);
+
+ return needs_border;
}
/* Emits combined texture state, which also includes any Image/SSBO
*/
static bool
fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
- enum pipe_shader_type type, const struct ir3_shader_variant *v)
- assert_dt
+ enum pipe_shader_type type,
+ const struct ir3_shader_variant *v) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- bool needs_border = false;
-
- static const struct {
- enum fd6_state_id state_id;
- unsigned enable_mask;
- } s[PIPE_SHADER_TYPES] = {
- [PIPE_SHADER_VERTEX] = { FD6_GROUP_VS_TEX, ENABLE_ALL },
- [PIPE_SHADER_TESS_CTRL] = { FD6_GROUP_HS_TEX, ENABLE_ALL },
- [PIPE_SHADER_TESS_EVAL] = { FD6_GROUP_DS_TEX, ENABLE_ALL },
- [PIPE_SHADER_GEOMETRY] = { FD6_GROUP_GS_TEX, ENABLE_ALL },
- [PIPE_SHADER_FRAGMENT] = { FD6_GROUP_FS_TEX, ENABLE_DRAW },
- };
-
- debug_assert(s[type].state_id);
-
- if (!v->image_mapping.num_tex && !v->fb_read) {
- /* in the fast-path, when we don't have to mix in any image/SSBO
- * related texture state, we can just lookup the stateobj and
- * re-emit that:
- *
- * Also, framebuffer-read is a slow-path because an extra
- * texture needs to be inserted.
- *
- * TODO we can probably simmplify things if we also treated
- * border_color as a slow-path.. this way the tex state key
- * wouldn't depend on bcolor_offset.. but fb_read might rather
- * be *somehow* a fast-path if we eventually used it for PLS.
- * I suppose there would be no harm in just *always* inserting
- * an fb_read texture?
- */
- if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
- ctx->tex[type].num_textures > 0) {
- struct fd6_texture_state *tex = fd6_texture_state(ctx,
- type, &ctx->tex[type]);
-
- needs_border |= tex->needs_border;
-
- fd6_emit_add_group(emit, tex->stateobj, s[type].state_id,
- s[type].enable_mask);
-
- fd6_texture_state_reference(&tex, NULL);
- }
- } else {
- /* In the slow-path, create a one-shot texture state object
- * if either TEX|PROG|SSBO|IMAGE state is dirty:
- */
- if ((ctx->dirty_shader[type] &
- (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
- FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) ||
- v->fb_read) {
- struct fd_texture_stateobj *tex = &ctx->tex[type];
- struct fd_ringbuffer *stateobj =
- fd_submit_new_ringbuffer(ctx->batch->submit,
- 0x1000, FD_RINGBUFFER_STREAMING);
- unsigned bcolor_offset =
- fd6_border_color_offset(ctx, type, tex);
-
- needs_border |= fd6_emit_textures(ctx, stateobj, type, tex,
- bcolor_offset, v);
-
- fd6_emit_take_group(emit, stateobj, s[type].state_id,
- s[type].enable_mask);
- }
- }
-
- return needs_border;
+ struct fd_context *ctx = emit->ctx;
+ bool needs_border = false;
+
+ static const struct {
+ enum fd6_state_id state_id;
+ unsigned enable_mask;
+ } s[PIPE_SHADER_TYPES] = {
+ [PIPE_SHADER_VERTEX] = {FD6_GROUP_VS_TEX, ENABLE_ALL},
+ [PIPE_SHADER_TESS_CTRL] = {FD6_GROUP_HS_TEX, ENABLE_ALL},
+ [PIPE_SHADER_TESS_EVAL] = {FD6_GROUP_DS_TEX, ENABLE_ALL},
+ [PIPE_SHADER_GEOMETRY] = {FD6_GROUP_GS_TEX, ENABLE_ALL},
+ [PIPE_SHADER_FRAGMENT] = {FD6_GROUP_FS_TEX, ENABLE_DRAW},
+ };
+
+ debug_assert(s[type].state_id);
+
+ if (!v->image_mapping.num_tex && !v->fb_read) {
+ /* in the fast-path, when we don't have to mix in any image/SSBO
+ * related texture state, we can just lookup the stateobj and
+ * re-emit that:
+ *
+ * Also, framebuffer-read is a slow-path because an extra
+ * texture needs to be inserted.
+ *
+ * TODO we can probably simmplify things if we also treated
+ * border_color as a slow-path.. this way the tex state key
+ * wouldn't depend on bcolor_offset.. but fb_read might rather
+ * be *somehow* a fast-path if we eventually used it for PLS.
+ * I suppose there would be no harm in just *always* inserting
+ * an fb_read texture?
+ */
+ if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
+ ctx->tex[type].num_textures > 0) {
+ struct fd6_texture_state *tex =
+ fd6_texture_state(ctx, type, &ctx->tex[type]);
+
+ needs_border |= tex->needs_border;
+
+ fd6_emit_add_group(emit, tex->stateobj, s[type].state_id,
+ s[type].enable_mask);
+
+ fd6_texture_state_reference(&tex, NULL);
+ }
+ } else {
+ /* In the slow-path, create a one-shot texture state object
+ * if either TEX|PROG|SSBO|IMAGE state is dirty:
+ */
+ if ((ctx->dirty_shader[type] &
+ (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE |
+ FD_DIRTY_SHADER_SSBO)) ||
+ v->fb_read) {
+ struct fd_texture_stateobj *tex = &ctx->tex[type];
+ struct fd_ringbuffer *stateobj = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+ unsigned bcolor_offset = fd6_border_color_offset(ctx, type, tex);
+
+ needs_border |=
+ fd6_emit_textures(ctx, stateobj, type, tex, bcolor_offset, v);
+
+ fd6_emit_take_group(emit, stateobj, s[type].state_id,
+ s[type].enable_mask);
+ }
+ }
+
+ return needs_border;
}
static struct fd_ringbuffer *
-build_vbo_state(struct fd6_emit *emit)
- assert_dt
+build_vbo_state(struct fd6_emit *emit) assert_dt
{
- const struct fd_vertex_state *vtx = emit->vtx;
-
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit,
- 4 * (1 + vtx->vertexbuf.count * 4), FD_RINGBUFFER_STREAMING);
-
- OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * vtx->vertexbuf.count);
- for (int32_t j = 0; j < vtx->vertexbuf.count; j++) {
- const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j];
- struct fd_resource *rsc = fd_resource(vb->buffer.resource);
- if (rsc == NULL) {
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
- } else {
- uint32_t off = vb->buffer_offset;
- uint32_t size = fd_bo_size(rsc->bo) - off;
-
- OUT_RELOC(ring, rsc->bo, off, 0, 0);
- OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
- OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
- }
- }
-
- return ring;
+ const struct fd_vertex_state *vtx = emit->vtx;
+
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ emit->ctx->batch->submit, 4 * (1 + vtx->vertexbuf.count * 4),
+ FD_RINGBUFFER_STREAMING);
+
+ OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * vtx->vertexbuf.count);
+ for (int32_t j = 0; j < vtx->vertexbuf.count; j++) {
+ const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j];
+ struct fd_resource *rsc = fd_resource(vb->buffer.resource);
+ if (rsc == NULL) {
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ } else {
+ uint32_t off = vb->buffer_offset;
+ uint32_t size = fd_bo_size(rsc->bo) - off;
+
+ OUT_RELOC(ring, rsc->bo, off, 0, 0);
+ OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
+ OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
+ }
+ }
+
+ return ring;
}
static enum a6xx_ztest_mode
-compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid)
- assert_dt
+compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
- const struct ir3_shader_variant *fs = emit->fs;
-
- if (fs->shader->nir->info.fs.early_fragment_tests)
- return A6XX_EARLY_Z;
-
- if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled ||
- fs->writes_stencilref) {
- return A6XX_LATE_Z;
- } else if ((fs->has_kill || zsa->alpha_test) &&
- (zsa->writes_zs || !pfb->zsbuf)) {
- /* Slightly odd, but seems like the hw wants us to select
- * LATE_Z mode if there is no depth buffer + discard. Either
- * that, or when occlusion query is enabled. See:
- *
- * dEQP-GLES31.functional.fbo.no_attachments.*
- */
- return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
- } else {
- return A6XX_EARLY_Z;
- }
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
+ const struct ir3_shader_variant *fs = emit->fs;
+
+ if (fs->shader->nir->info.fs.early_fragment_tests)
+ return A6XX_EARLY_Z;
+
+ if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled ||
+ fs->writes_stencilref) {
+ return A6XX_LATE_Z;
+ } else if ((fs->has_kill || zsa->alpha_test) &&
+ (zsa->writes_zs || !pfb->zsbuf)) {
+ /* Slightly odd, but seems like the hw wants us to select
+ * LATE_Z mode if there is no depth buffer + discard. Either
+ * that, or when occlusion query is enabled. See:
+ *
+ * dEQP-GLES31.functional.fbo.no_attachments.*
+ */
+ return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
+ } else {
+ return A6XX_EARLY_Z;
+ }
}
/**
* to invalidate lrz.
*/
static struct fd6_lrz_state
-compute_lrz_state(struct fd6_emit *emit, bool binning_pass)
- assert_dt
+compute_lrz_state(struct fd6_emit *emit, bool binning_pass) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- const struct ir3_shader_variant *fs = emit->fs;
- struct fd6_lrz_state lrz;
-
- if (!pfb->zsbuf) {
- memset(&lrz, 0, sizeof(lrz));
- if (!binning_pass) {
- lrz.z_mode = compute_ztest_mode(emit, false);
- }
- return lrz;
- }
-
- struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
- struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-
- lrz = zsa->lrz;
-
- /* normalize lrz state: */
- if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {
- lrz.write = false;
- if (binning_pass)
- lrz.enable = false;
- }
-
- /* if we change depthfunc direction, bail out on using LRZ. The
- * LRZ buffer encodes a min/max depth value per block, but if
- * we switch from GT/GE <-> LT/LE, those values cannot be
- * interpreted properly.
- */
- if (zsa->base.depth_enabled &&
- (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
- (rsc->lrz_direction != lrz.direction)) {
- rsc->lrz_valid = false;
- }
-
- if (zsa->invalidate_lrz || !rsc->lrz_valid) {
- rsc->lrz_valid = false;
- memset(&lrz, 0, sizeof(lrz));
- }
-
- if (fs->no_earlyz || fs->writes_pos) {
- lrz.enable = false;
- lrz.write = false;
- lrz.test = false;
- }
-
- if (!binning_pass) {
- lrz.z_mode = compute_ztest_mode(emit, rsc->lrz_valid);
- }
-
- /* Once we start writing to the real depth buffer, we lock in the
- * direction for LRZ.. if we have to skip a LRZ write for any
- * reason, it is still safe to have LRZ until there is a direction
- * reversal. Prior to the reversal, since we disabled LRZ writes
- * in the "unsafe" cases, this just means that the LRZ test may
- * not early-discard some things that end up not passing a later
- * test (ie. be overly concervative). But once you have a reversal
- * of direction, it is possible to increase/decrease the z value
- * to the point where the overly-conservative test is incorrect.
- */
- if (zsa->base.depth_writemask) {
- rsc->lrz_direction = lrz.direction;
- }
-
- return lrz;
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const struct ir3_shader_variant *fs = emit->fs;
+ struct fd6_lrz_state lrz;
+
+ if (!pfb->zsbuf) {
+ memset(&lrz, 0, sizeof(lrz));
+ if (!binning_pass) {
+ lrz.z_mode = compute_ztest_mode(emit, false);
+ }
+ return lrz;
+ }
+
+ struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
+ struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+
+ lrz = zsa->lrz;
+
+ /* normalize lrz state: */
+ if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {
+ lrz.write = false;
+ if (binning_pass)
+ lrz.enable = false;
+ }
+
+ /* if we change depthfunc direction, bail out on using LRZ. The
+ * LRZ buffer encodes a min/max depth value per block, but if
+ * we switch from GT/GE <-> LT/LE, those values cannot be
+ * interpreted properly.
+ */
+ if (zsa->base.depth_enabled && (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
+ (rsc->lrz_direction != lrz.direction)) {
+ rsc->lrz_valid = false;
+ }
+
+ if (zsa->invalidate_lrz || !rsc->lrz_valid) {
+ rsc->lrz_valid = false;
+ memset(&lrz, 0, sizeof(lrz));
+ }
+
+ if (fs->no_earlyz || fs->writes_pos) {
+ lrz.enable = false;
+ lrz.write = false;
+ lrz.test = false;
+ }
+
+ if (!binning_pass) {
+ lrz.z_mode = compute_ztest_mode(emit, rsc->lrz_valid);
+ }
+
+ /* Once we start writing to the real depth buffer, we lock in the
+ * direction for LRZ.. if we have to skip a LRZ write for any
+ * reason, it is still safe to have LRZ until there is a direction
+ * reversal. Prior to the reversal, since we disabled LRZ writes
+ * in the "unsafe" cases, this just means that the LRZ test may
+ * not early-discard some things that end up not passing a later
+ * test (ie. be overly concervative). But once you have a reversal
+ * of direction, it is possible to increase/decrease the z value
+ * to the point where the overly-conservative test is incorrect.
+ */
+ if (zsa->base.depth_writemask) {
+ rsc->lrz_direction = lrz.direction;
+ }
+
+ return lrz;
}
static struct fd_ringbuffer *
-build_lrz(struct fd6_emit *emit, bool binning_pass)
- assert_dt
+build_lrz(struct fd6_emit *emit, bool binning_pass) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd6_lrz_state lrz =
- compute_lrz_state(emit, binning_pass);
-
- /* If the LRZ state has not changed, we can skip the emit: */
- if (!ctx->last.dirty &&
- !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
- return NULL;
-
- fd6_ctx->last.lrz[binning_pass] = lrz;
-
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
- 8*4, FD_RINGBUFFER_STREAMING);
-
- OUT_REG(ring, A6XX_GRAS_LRZ_CNTL(
- .enable = lrz.enable,
- .lrz_write = lrz.write,
- .greater = lrz.direction == FD_LRZ_GREATER,
- .z_test_enable = lrz.test,
- ));
- OUT_REG(ring, A6XX_RB_LRZ_CNTL(
- .enable = lrz.enable,
- ));
-
- OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL(
- .z_mode = lrz.z_mode,
- ));
-
- OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL(
- .z_mode = lrz.z_mode,
- ));
-
- return ring;
+ struct fd_context *ctx = emit->ctx;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_lrz_state lrz = compute_lrz_state(emit, binning_pass);
+
+ /* If the LRZ state has not changed, we can skip the emit: */
+ if (!ctx->last.dirty &&
+ !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
+ return NULL;
+
+ fd6_ctx->last.lrz[binning_pass] = lrz;
+
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 8 * 4, FD_RINGBUFFER_STREAMING);
+
+ OUT_REG(ring,
+ A6XX_GRAS_LRZ_CNTL(.enable = lrz.enable, .lrz_write = lrz.write,
+ .greater = lrz.direction == FD_LRZ_GREATER,
+ .z_test_enable = lrz.test, ));
+ OUT_REG(ring, A6XX_RB_LRZ_CNTL(.enable = lrz.enable, ));
+
+ OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
+
+ OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
+
+ return ring;
}
static struct fd_ringbuffer *
-build_scissor(struct fd6_emit *emit)
- assert_dt
+build_scissor(struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, 3*4,
- FD_RINGBUFFER_STREAMING);
-
- OUT_REG(ring,
- A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0,
- .x = scissor->minx,
- .y = scissor->miny
- ),
- A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0,
- .x = MAX2(scissor->maxx, 1) - 1,
- .y = MAX2(scissor->maxy, 1) - 1
- )
- );
-
- ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
- ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
- ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
- ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
-
- return ring;
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ emit->ctx->batch->submit, 3 * 4, FD_RINGBUFFER_STREAMING);
+
+ OUT_REG(
+ ring,
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = scissor->minx, .y = scissor->miny),
+ A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,
+ .y = MAX2(scissor->maxy, 1) - 1));
+
+ ctx->batch->max_scissor.minx =
+ MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+ ctx->batch->max_scissor.miny =
+ MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+ ctx->batch->max_scissor.maxx =
+ MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+ ctx->batch->max_scissor.maxy =
+ MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+
+ return ring;
}
/* Combination of FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
* FD_DIRTY_PROG | FD_DIRTY_DUAL_BLEND
*/
static struct fd_ringbuffer *
-build_prog_fb_rast(struct fd6_emit *emit)
- assert_dt
+build_prog_fb_rast(struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
- const struct ir3_shader_variant *fs = emit->fs;
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
+ const struct ir3_shader_variant *fs = emit->fs;
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
- 9 * 4, FD_RINGBUFFER_STREAMING);
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
- unsigned nr = pfb->nr_cbufs;
+ unsigned nr = pfb->nr_cbufs;
- if (ctx->rasterizer->rasterizer_discard)
- nr = 0;
+ if (ctx->rasterizer->rasterizer_discard)
+ nr = 0;
- struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
+ struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
- if (blend->use_dual_src_blend)
- nr++;
+ if (blend->use_dual_src_blend)
+ nr++;
- OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
- OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
- COND(fs->writes_smask && pfb->samples > 1,
- A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
- COND(fs->writes_stencilref,
- A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
- COND(blend->use_dual_src_blend,
- A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
- OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
+ OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
+ OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
+ COND(fs->writes_smask && pfb->samples > 1,
+ A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
+ COND(fs->writes_stencilref,
+ A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
+ COND(blend->use_dual_src_blend,
+ A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
+ OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
- OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
- OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));
+ OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
+ OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));
- unsigned mrt_components = 0;
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- mrt_components |= 0xf << (i * 4);
- }
+ unsigned mrt_components = 0;
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ mrt_components |= 0xf << (i * 4);
+ }
- /* dual source blending has an extra fs output in the 2nd slot */
- if (blend->use_dual_src_blend)
- mrt_components |= 0xf << 4;
+ /* dual source blending has an extra fs output in the 2nd slot */
+ if (blend->use_dual_src_blend)
+ mrt_components |= 0xf << 4;
- mrt_components &= prog->mrt_components;
+ mrt_components &= prog->mrt_components;
- OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));
- OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));
+ OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));
+ OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));
- return ring;
+ return ring;
}
static struct fd_ringbuffer *
-build_blend_color(struct fd6_emit *emit)
- assert_dt
+build_blend_color(struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_blend_color *bcolor = &ctx->blend_color;
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
- 5*4, FD_RINGBUFFER_STREAMING);
-
- OUT_REG(ring,
- A6XX_RB_BLEND_RED_F32(bcolor->color[0]),
- A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),
- A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),
- A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])
- );
-
- return ring;
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_blend_color *bcolor = &ctx->blend_color;
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);
+
+ OUT_REG(ring, A6XX_RB_BLEND_RED_F32(bcolor->color[0]),
+ A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),
+ A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),
+ A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
+
+ return ring;
}
static struct fd_ringbuffer *
-build_ibo(struct fd6_emit *emit)
- assert_dt
+build_ibo(struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
-
- if (emit->hs) {
- debug_assert(ir3_shader_nibo(emit->hs) == 0);
- debug_assert(ir3_shader_nibo(emit->ds) == 0);
- }
- if (emit->gs) {
- debug_assert(ir3_shader_nibo(emit->gs) == 0);
- }
-
- struct fd_ringbuffer *ibo_state =
- fd6_build_ibo_state(ctx, emit->fs, PIPE_SHADER_FRAGMENT);
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
- ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
-
- OUT_PKT7(ring, CP_LOAD_STATE6, 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
- CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(emit->fs)));
- OUT_RB(ring, ibo_state);
-
- OUT_PKT4(ring, REG_A6XX_SP_IBO, 2);
- OUT_RB(ring, ibo_state);
-
- /* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could
- * de-duplicate this from program->config_stateobj
- */
- OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
- OUT_RING(ring, ir3_shader_nibo(emit->fs));
-
- fd_ringbuffer_del(ibo_state);
-
- return ring;
+ struct fd_context *ctx = emit->ctx;
+
+ if (emit->hs) {
+ debug_assert(ir3_shader_nibo(emit->hs) == 0);
+ debug_assert(ir3_shader_nibo(emit->ds) == 0);
+ }
+ if (emit->gs) {
+ debug_assert(ir3_shader_nibo(emit->gs) == 0);
+ }
+
+ struct fd_ringbuffer *ibo_state =
+ fd6_build_ibo_state(ctx, emit->fs, PIPE_SHADER_FRAGMENT);
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
+
+ OUT_PKT7(ring, CP_LOAD_STATE6, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
+ CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(emit->fs)));
+ OUT_RB(ring, ibo_state);
+
+ OUT_PKT4(ring, REG_A6XX_SP_IBO, 2);
+ OUT_RB(ring, ibo_state);
+
+ /* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could
+ * de-duplicate this from program->config_stateobj
+ */
+ OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
+ OUT_RING(ring, ir3_shader_nibo(emit->fs));
+
+ fd_ringbuffer_del(ibo_state);
+
+ return ring;
}
static void
-fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit)
- assert_dt
+fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
- struct ir3_stream_output_info *info = prog->stream_output;
- struct fd_streamout_stateobj *so = &ctx->streamout;
-
- emit->streamout_mask = 0;
-
- if (!info)
- return;
-
- for (unsigned i = 0; i < so->num_targets; i++) {
- struct fd_stream_output_target *target = fd_stream_output_target(so->targets[i]);
-
- if (!target)
- continue;
-
- target->stride = info->stride[i];
-
- OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE(i), 3);
- /* VPC_SO[i].BUFFER_BASE_LO: */
- OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
- OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
-
- struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
-
- if (so->reset & (1 << i)) {
- assert(so->offsets[i] == 0);
-
- OUT_PKT7(ring, CP_MEM_WRITE, 3);
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
- OUT_RING(ring, target->base.buffer_offset);
-
- OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
- OUT_RING(ring, target->base.buffer_offset);
- } else {
- OUT_PKT7(ring, CP_MEM_TO_REG, 3);
- OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
- CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
- CP_MEM_TO_REG_0_CNT(0));
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
- }
-
- // After a draw HW would write the new offset to offset_bo
- OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);
- OUT_RELOC(ring, offset_bo, 0, 0, 0);
-
- so->reset &= ~(1 << i);
-
- emit->streamout_mask |= (1 << i);
- }
-
- if (emit->streamout_mask) {
- fd6_emit_add_group(emit, prog->streamout_stateobj, FD6_GROUP_SO, ENABLE_ALL);
- } else {
- /* If we transition from a draw with streamout to one without, turn
- * off streamout.
- */
- if (ctx->last.streamout_mask != 0) {
- struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(emit->ctx->batch->submit,
- 5 * 4, FD_RINGBUFFER_STREAMING);
-
- OUT_PKT7(obj, CP_CONTEXT_REG_BUNCH, 4);
- OUT_RING(obj, REG_A6XX_VPC_SO_CNTL);
- OUT_RING(obj, 0);
- OUT_RING(obj, REG_A6XX_VPC_SO_STREAM_CNTL);
- OUT_RING(obj, 0);
-
- fd6_emit_take_group(emit, obj, FD6_GROUP_SO, ENABLE_ALL);
- }
- }
-
- ctx->last.streamout_mask = emit->streamout_mask;
+ struct fd_context *ctx = emit->ctx;
+ const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
+ struct ir3_stream_output_info *info = prog->stream_output;
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+
+ emit->streamout_mask = 0;
+
+ if (!info)
+ return;
+
+ for (unsigned i = 0; i < so->num_targets; i++) {
+ struct fd_stream_output_target *target =
+ fd_stream_output_target(so->targets[i]);
+
+ if (!target)
+ continue;
+
+ target->stride = info->stride[i];
+
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE(i), 3);
+ /* VPC_SO[i].BUFFER_BASE_LO: */
+ OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
+ OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
+
+ struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
+
+ if (so->reset & (1 << i)) {
+ assert(so->offsets[i] == 0);
+
+ OUT_PKT7(ring, CP_MEM_WRITE, 3);
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+ OUT_RING(ring, target->base.buffer_offset);
+
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
+ OUT_RING(ring, target->base.buffer_offset);
+ } else {
+ OUT_PKT7(ring, CP_MEM_TO_REG, 3);
+ OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
+ CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
+ CP_MEM_TO_REG_0_CNT(0));
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+ }
+
+ // After a draw HW would write the new offset to offset_bo
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);
+ OUT_RELOC(ring, offset_bo, 0, 0, 0);
+
+ so->reset &= ~(1 << i);
+
+ emit->streamout_mask |= (1 << i);
+ }
+
+ if (emit->streamout_mask) {
+ fd6_emit_add_group(emit, prog->streamout_stateobj, FD6_GROUP_SO,
+ ENABLE_ALL);
+ } else {
+ /* If we transition from a draw with streamout to one without, turn
+ * off streamout.
+ */
+ if (ctx->last.streamout_mask != 0) {
+ struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(
+ emit->ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);
+
+ OUT_PKT7(obj, CP_CONTEXT_REG_BUNCH, 4);
+ OUT_RING(obj, REG_A6XX_VPC_SO_CNTL);
+ OUT_RING(obj, 0);
+ OUT_RING(obj, REG_A6XX_VPC_SO_STREAM_CNTL);
+ OUT_RING(obj, 0);
+
+ fd6_emit_take_group(emit, obj, FD6_GROUP_SO, ENABLE_ALL);
+ }
+ }
+
+ ctx->last.streamout_mask = emit->streamout_mask;
}
/**
* Stuff that less frequently changes and isn't (yet) moved into stategroups
*/
static void
-fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit)
- assert_dt
+fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
{
- struct fd_context *ctx = emit->ctx;
- const enum fd_dirty_3d_state dirty = emit->dirty;
-
- if (dirty & FD_DIRTY_STENCIL_REF) {
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
-
- OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 1);
- OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0]) |
- A6XX_RB_STENCILREF_BFREF(sr->ref_value[1]));
- }
-
- if (dirty & FD_DIRTY_VIEWPORT) {
- struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
-
- OUT_REG(ring,
- A6XX_GRAS_CL_VPORT_XOFFSET(0, ctx->viewport.translate[0]),
- A6XX_GRAS_CL_VPORT_XSCALE(0, ctx->viewport.scale[0]),
- A6XX_GRAS_CL_VPORT_YOFFSET(0, ctx->viewport.translate[1]),
- A6XX_GRAS_CL_VPORT_YSCALE(0, ctx->viewport.scale[1]),
- A6XX_GRAS_CL_VPORT_ZOFFSET(0, ctx->viewport.translate[2]),
- A6XX_GRAS_CL_VPORT_ZSCALE(0, ctx->viewport.scale[2])
- );
-
- OUT_REG(ring,
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0,
- .x = scissor->minx,
- .y = scissor->miny
- ),
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0,
- .x = MAX2(scissor->maxx, 1) - 1,
- .y = MAX2(scissor->maxy, 1) - 1
- )
- );
-
- unsigned guardband_x =
- fd_calc_guardband(ctx->viewport.translate[0], ctx->viewport.scale[0],
- false);
- unsigned guardband_y =
- fd_calc_guardband(ctx->viewport.translate[1], ctx->viewport.scale[1],
- false);
-
- OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(
- .horz = guardband_x,
- .vert = guardband_y
- )
- );
- }
-
- /* The clamp ranges are only used when the rasterizer wants depth
- * clamping.
- */
- if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER)) &&
- fd_depth_clamp_enabled(ctx)) {
- float zmin, zmax;
- util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
- &zmin, &zmax);
-
- OUT_REG(ring,
- A6XX_GRAS_CL_Z_CLAMP_MIN(0, zmin),
- A6XX_GRAS_CL_Z_CLAMP_MAX(0, zmax));
-
- OUT_REG(ring,
- A6XX_RB_Z_CLAMP_MIN(zmin),
- A6XX_RB_Z_CLAMP_MAX(zmax));
- }
+ struct fd_context *ctx = emit->ctx;
+ const enum fd_dirty_3d_state dirty = emit->dirty;
+
+ if (dirty & FD_DIRTY_STENCIL_REF) {
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 1);
+ OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0]) |
+ A6XX_RB_STENCILREF_BFREF(sr->ref_value[1]));
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
+
+ OUT_REG(ring, A6XX_GRAS_CL_VPORT_XOFFSET(0, ctx->viewport.translate[0]),
+ A6XX_GRAS_CL_VPORT_XSCALE(0, ctx->viewport.scale[0]),
+ A6XX_GRAS_CL_VPORT_YOFFSET(0, ctx->viewport.translate[1]),
+ A6XX_GRAS_CL_VPORT_YSCALE(0, ctx->viewport.scale[1]),
+ A6XX_GRAS_CL_VPORT_ZOFFSET(0, ctx->viewport.translate[2]),
+ A6XX_GRAS_CL_VPORT_ZSCALE(0, ctx->viewport.scale[2]));
+
+ OUT_REG(
+ ring,
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = scissor->minx,
+ .y = scissor->miny),
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,
+ .y = MAX2(scissor->maxy, 1) - 1));
+
+ unsigned guardband_x = fd_calc_guardband(ctx->viewport.translate[0],
+ ctx->viewport.scale[0], false);
+ unsigned guardband_y = fd_calc_guardband(ctx->viewport.translate[1],
+ ctx->viewport.scale[1], false);
+
+ OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = guardband_x,
+ .vert = guardband_y));
+ }
+
+ /* The clamp ranges are only used when the rasterizer wants depth
+ * clamping.
+ */
+ if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER)) &&
+ fd_depth_clamp_enabled(ctx)) {
+ float zmin, zmax;
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_REG(ring, A6XX_GRAS_CL_Z_CLAMP_MIN(0, zmin),
+ A6XX_GRAS_CL_Z_CLAMP_MAX(0, zmax));
+
+ OUT_REG(ring, A6XX_RB_Z_CLAMP_MIN(zmin), A6XX_RB_Z_CLAMP_MAX(zmax));
+ }
}
void
fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
{
- struct fd_context *ctx = emit->ctx;
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
- const struct ir3_shader_variant *vs = emit->vs;
- const struct ir3_shader_variant *hs = emit->hs;
- const struct ir3_shader_variant *ds = emit->ds;
- const struct ir3_shader_variant *gs = emit->gs;
- const struct ir3_shader_variant *fs = emit->fs;
- bool needs_border = false;
-
- emit_marker6(ring, 5);
-
- /* NOTE: we track fb_read differently than _BLEND_ENABLED since we
- * might decide to do sysmem in some cases when blend is enabled:
- */
- if (fs->fb_read)
- ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
-
- u_foreach_bit (b, emit->dirty_groups) {
- enum fd6_state_id group = b;
- struct fd_ringbuffer *state = NULL;
- uint32_t enable_mask = ENABLE_ALL;
-
- switch (group) {
- case FD6_GROUP_VTXSTATE:
- state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj;
- fd_ringbuffer_ref(state);
- break;
- case FD6_GROUP_VBO:
- state = build_vbo_state(emit);
- break;
- case FD6_GROUP_ZSA:
- state = fd6_zsa_state(ctx,
- util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),
- fd_depth_clamp_enabled(ctx));
- fd_ringbuffer_ref(state);
- break;
- case FD6_GROUP_LRZ:
- state = build_lrz(emit, false);
- if (!state)
- continue;
- enable_mask = ENABLE_DRAW;
- break;
- case FD6_GROUP_LRZ_BINNING:
- state = build_lrz(emit, true);
- if (!state)
- continue;
- enable_mask = CP_SET_DRAW_STATE__0_BINNING;
- break;
- case FD6_GROUP_SCISSOR:
- state = build_scissor(emit);
- break;
- case FD6_GROUP_PROG:
- fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, ENABLE_ALL);
- fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);
- fd6_emit_add_group(emit, prog->binning_stateobj,
- FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
-
- /* emit remaining streaming program state, ie. what depends on
- * other emit state, so cannot be pre-baked.
- */
- fd6_emit_take_group(emit, fd6_program_interp_state(emit),
- FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
- continue;
- case FD6_GROUP_RASTERIZER:
- state = fd6_rasterizer_state(ctx, emit->primitive_restart);
- fd_ringbuffer_ref(state);
- break;
- case FD6_GROUP_PROG_FB_RAST:
- state = build_prog_fb_rast(emit);
- break;
- case FD6_GROUP_BLEND:
- state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask)->stateobj;
- fd_ringbuffer_ref(state);
- break;
- case FD6_GROUP_BLEND_COLOR:
- state = build_blend_color(emit);
- break;
- case FD6_GROUP_IBO:
- state = build_ibo(emit);
- fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);
- break;
- case FD6_GROUP_CONST:
- state = fd6_build_user_consts(emit);
- break;
- case FD6_GROUP_VS_DRIVER_PARAMS:
- state = fd6_build_vs_driver_params(emit);
- break;
- case FD6_GROUP_PRIMITIVE_PARAMS:
- state = fd6_build_tess_consts(emit);
- break;
- case FD6_GROUP_VS_TEX:
- needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);
- continue;
- case FD6_GROUP_HS_TEX:
- if (hs) {
- needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_CTRL, hs);
- }
- continue;
- case FD6_GROUP_DS_TEX:
- if (ds) {
- needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_EVAL, ds);
- }
- continue;
- case FD6_GROUP_GS_TEX:
- if (gs) {
- needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);
- }
- continue;
- case FD6_GROUP_FS_TEX:
- needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);
- continue;
- case FD6_GROUP_SO:
- fd6_emit_streamout(ring, emit);
- continue;
- case FD6_GROUP_NON_GROUP:
- fd6_emit_non_ring(ring, emit);
- continue;
- default:
- unreachable("bad state group");
- }
-
- fd6_emit_take_group(emit, state, group, enable_mask);
- }
-
- if (needs_border)
- emit_border_color(ctx, ring);
-
- if (emit->num_groups > 0) {
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
- for (unsigned i = 0; i < emit->num_groups; i++) {
- struct fd6_state_group *g = &emit->groups[i];
- unsigned n = g->stateobj ?
- fd_ringbuffer_size(g->stateobj) / 4 : 0;
-
- debug_assert((g->enable_mask & ~ENABLE_ALL) == 0);
-
- if (n == 0) {
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE |
- g->enable_mask |
- CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- } else {
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) |
- g->enable_mask |
- CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
- OUT_RB(ring, g->stateobj);
- }
-
- if (g->stateobj)
- fd_ringbuffer_del(g->stateobj);
- }
- emit->num_groups = 0;
- }
+ struct fd_context *ctx = emit->ctx;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
+ const struct ir3_shader_variant *vs = emit->vs;
+ const struct ir3_shader_variant *hs = emit->hs;
+ const struct ir3_shader_variant *ds = emit->ds;
+ const struct ir3_shader_variant *gs = emit->gs;
+ const struct ir3_shader_variant *fs = emit->fs;
+ bool needs_border = false;
+
+ emit_marker6(ring, 5);
+
+ /* NOTE: we track fb_read differently than _BLEND_ENABLED since we
+ * might decide to do sysmem in some cases when blend is enabled:
+ */
+ if (fs->fb_read)
+ ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
+
+ u_foreach_bit (b, emit->dirty_groups) {
+ enum fd6_state_id group = b;
+ struct fd_ringbuffer *state = NULL;
+ uint32_t enable_mask = ENABLE_ALL;
+
+ switch (group) {
+ case FD6_GROUP_VTXSTATE:
+ state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj;
+ fd_ringbuffer_ref(state);
+ break;
+ case FD6_GROUP_VBO:
+ state = build_vbo_state(emit);
+ break;
+ case FD6_GROUP_ZSA:
+ state = fd6_zsa_state(
+ ctx,
+ util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),
+ fd_depth_clamp_enabled(ctx));
+ fd_ringbuffer_ref(state);
+ break;
+ case FD6_GROUP_LRZ:
+ state = build_lrz(emit, false);
+ if (!state)
+ continue;
+ enable_mask = ENABLE_DRAW;
+ break;
+ case FD6_GROUP_LRZ_BINNING:
+ state = build_lrz(emit, true);
+ if (!state)
+ continue;
+ enable_mask = CP_SET_DRAW_STATE__0_BINNING;
+ break;
+ case FD6_GROUP_SCISSOR:
+ state = build_scissor(emit);
+ break;
+ case FD6_GROUP_PROG:
+ fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG,
+ ENABLE_ALL);
+ fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);
+ fd6_emit_add_group(emit, prog->binning_stateobj,
+ FD6_GROUP_PROG_BINNING,
+ CP_SET_DRAW_STATE__0_BINNING);
+
+ /* emit remaining streaming program state, ie. what depends on
+ * other emit state, so cannot be pre-baked.
+ */
+ fd6_emit_take_group(emit, fd6_program_interp_state(emit),
+ FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
+ continue;
+ case FD6_GROUP_RASTERIZER:
+ state = fd6_rasterizer_state(ctx, emit->primitive_restart);
+ fd_ringbuffer_ref(state);
+ break;
+ case FD6_GROUP_PROG_FB_RAST:
+ state = build_prog_fb_rast(emit);
+ break;
+ case FD6_GROUP_BLEND:
+ state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask)
+ ->stateobj;
+ fd_ringbuffer_ref(state);
+ break;
+ case FD6_GROUP_BLEND_COLOR:
+ state = build_blend_color(emit);
+ break;
+ case FD6_GROUP_IBO:
+ state = build_ibo(emit);
+ fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);
+ break;
+ case FD6_GROUP_CONST:
+ state = fd6_build_user_consts(emit);
+ break;
+ case FD6_GROUP_VS_DRIVER_PARAMS:
+ state = fd6_build_vs_driver_params(emit);
+ break;
+ case FD6_GROUP_PRIMITIVE_PARAMS:
+ state = fd6_build_tess_consts(emit);
+ break;
+ case FD6_GROUP_VS_TEX:
+ needs_border |=
+ fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);
+ continue;
+ case FD6_GROUP_HS_TEX:
+ if (hs) {
+ needs_border |= fd6_emit_combined_textures(
+ ring, emit, PIPE_SHADER_TESS_CTRL, hs);
+ }
+ continue;
+ case FD6_GROUP_DS_TEX:
+ if (ds) {
+ needs_border |= fd6_emit_combined_textures(
+ ring, emit, PIPE_SHADER_TESS_EVAL, ds);
+ }
+ continue;
+ case FD6_GROUP_GS_TEX:
+ if (gs) {
+ needs_border |=
+ fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);
+ }
+ continue;
+ case FD6_GROUP_FS_TEX:
+ needs_border |=
+ fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);
+ continue;
+ case FD6_GROUP_SO:
+ fd6_emit_streamout(ring, emit);
+ continue;
+ case FD6_GROUP_NON_GROUP:
+ fd6_emit_non_ring(ring, emit);
+ continue;
+ default:
+ unreachable("bad state group");
+ }
+
+ fd6_emit_take_group(emit, state, group, enable_mask);
+ }
+
+ if (needs_border)
+ emit_border_color(ctx, ring);
+
+ if (emit->num_groups > 0) {
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
+ for (unsigned i = 0; i < emit->num_groups; i++) {
+ struct fd6_state_group *g = &emit->groups[i];
+ unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
+
+ debug_assert((g->enable_mask & ~ENABLE_ALL) == 0);
+
+ if (n == 0) {
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
+ CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
+ CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
+ OUT_RB(ring, g->stateobj);
+ }
+
+ if (g->stateobj)
+ fd_ringbuffer_del(g->stateobj);
+ }
+ emit->num_groups = 0;
+ }
}
void
fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct ir3_shader_variant *cp)
+ struct ir3_shader_variant *cp)
{
- enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
+ enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
- if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
- FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
- struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE];
- unsigned bcolor_offset = fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex);
+ if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
+ FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
+ struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE];
+ unsigned bcolor_offset =
+ fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex);
- bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex,
- bcolor_offset, cp);
+ bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex,
+ bcolor_offset, cp);
- if (needs_border)
- emit_border_color(ctx, ring);
+ if (needs_border)
+ emit_border_color(ctx, ring);
- OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A6XX_SP_HS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_HS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A6XX_SP_DS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_DS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A6XX_SP_GS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_GS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
- }
+ OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
+ OUT_RING(ring, 0);
+ }
- if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
- struct fd_ringbuffer *state =
- fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE);
+ if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
+ struct fd_ringbuffer *state =
+ fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE);
- OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
- CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp)));
- OUT_RB(ring, state);
+ OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp)));
+ OUT_RB(ring, state);
- OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2);
- OUT_RB(ring, state);
+ OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2);
+ OUT_RB(ring, state);
- OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);
- OUT_RING(ring, ir3_shader_nibo(cp));
+ OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);
+ OUT_RING(ring, ir3_shader_nibo(cp));
- fd_ringbuffer_del(state);
- }
+ fd_ringbuffer_del(state);
+ }
}
-
/* emit setup at begin of new cmdstream buffer (don't rely on previous
* state, there could have been a context switch between ioctls):
*/
void
fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- //struct fd_context *ctx = batch->ctx;
-
- if (!batch->nondraw) {
- trace_start_state_restore(&batch->trace);
- }
-
- fd6_cache_inv(batch, ring);
-
- OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
- .vs_state = true,
- .hs_state = true,
- .ds_state = true,
- .gs_state = true,
- .fs_state = true,
- .cs_state = true,
- .gfx_ibo = true,
- .cs_ibo = true,
- .gfx_shared_const = true,
- .cs_shared_const = true,
- .gfx_bindless = 0x1f,
- .cs_bindless = 0x1f
- ));
-
- OUT_WFI5(ring);
-
- WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x0);
- WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF);
- WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0);
- WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);
- WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);
- WRITE(REG_A6XX_TPL1_UNKNOWN_B600, 0x100000);
- WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
- WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
-
- WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0);
- WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
- WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000);
- WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430);
- WRITE(REG_A6XX_SP_IBO_COUNT, 0);
- WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);
- WRITE(REG_A6XX_HLSQ_SHARED_CONSTS, 0);
- WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
- WRITE(REG_A6XX_UCHE_CLIENT_PF, 4);
- WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1);
- WRITE(REG_A6XX_SP_MODE_CONTROL, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
- WRITE(REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
- WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
- WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f);
-
- WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0);
- WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0);
- WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);
-
- WRITE(REG_A6XX_RB_UNKNOWN_8818, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_8819, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_881A, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_881B, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_881C, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_881D, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_881E, 0);
- WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0);
-
- WRITE(REG_A6XX_VPC_POINT_COORD_INVERT,
- A6XX_VPC_POINT_COORD_INVERT(0).value);
- WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0);
-
- WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value);
-
- WRITE(REG_A6XX_PC_RASTER_CNTL, 0);
-
- WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);
-
- WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);
-
- WRITE(REG_A6XX_GRAS_UNKNOWN_8099, 0);
- WRITE(REG_A6XX_GRAS_VS_LAYER_CNTL, 0);
- WRITE(REG_A6XX_GRAS_UNKNOWN_80A0, 2);
- WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0);
- WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0);
- WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);
- WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);
- WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
- WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);
- /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309
- * but this seems to kill texture gather offsets.
- */
- WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2);
- WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);
- WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
- WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);
- WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);
- WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
-
- emit_marker6(ring, 7);
-
- OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
-
- WRITE(REG_A6XX_VFD_MULTIVIEW_CNTL, 0);
-
- OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1);
- OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
-
- /* Clear any potential pending state groups to be safe: */
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
-
- OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_CNTL, 1);
- OUT_RING(ring, 0x00000000); /* VPC_SO_STREAM_CNTL */
-
- OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
- OUT_RING(ring, 0x00000000);
-
- if (!batch->nondraw) {
- trace_end_state_restore(&batch->trace);
- }
+ // struct fd_context *ctx = batch->ctx;
+
+ if (!batch->nondraw) {
+ trace_start_state_restore(&batch->trace);
+ }
+
+ fd6_cache_inv(batch, ring);
+
+ OUT_REG(ring,
+ A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
+ .ds_state = true, .gs_state = true,
+ .fs_state = true, .cs_state = true,
+ .gfx_ibo = true, .cs_ibo = true,
+ .gfx_shared_const = true,
+ .cs_shared_const = true,
+ .gfx_bindless = 0x1f, .cs_bindless = 0x1f));
+
+ OUT_WFI5(ring);
+
+ WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x0);
+ WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF);
+ WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0);
+ WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);
+ WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);
+ WRITE(REG_A6XX_TPL1_UNKNOWN_B600, 0x100000);
+ WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
+ WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
+
+ WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0);
+ WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
+ WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000);
+ WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430);
+ WRITE(REG_A6XX_SP_IBO_COUNT, 0);
+ WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);
+ WRITE(REG_A6XX_HLSQ_SHARED_CONSTS, 0);
+ WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
+ WRITE(REG_A6XX_UCHE_CLIENT_PF, 4);
+ WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1);
+ WRITE(REG_A6XX_SP_MODE_CONTROL,
+ A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
+ WRITE(REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
+ WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
+ WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f);
+
+ WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0);
+ WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0);
+ WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);
+
+ WRITE(REG_A6XX_RB_UNKNOWN_8818, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_8819, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_881A, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_881B, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_881C, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_881D, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_881E, 0);
+ WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0);
+
+ WRITE(REG_A6XX_VPC_POINT_COORD_INVERT, A6XX_VPC_POINT_COORD_INVERT(0).value);
+ WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0);
+
+ WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value);
+
+ WRITE(REG_A6XX_PC_RASTER_CNTL, 0);
+
+ WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);
+
+ WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);
+
+ WRITE(REG_A6XX_GRAS_UNKNOWN_8099, 0);
+ WRITE(REG_A6XX_GRAS_VS_LAYER_CNTL, 0);
+ WRITE(REG_A6XX_GRAS_UNKNOWN_80A0, 2);
+ WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0);
+ WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0);
+ WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);
+ WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);
+ WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
+ WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);
+ /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309
+ * but this seems to kill texture gather offsets.
+ */
+ WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2);
+ WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);
+ WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
+ WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);
+ WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);
+ WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
+
+ emit_marker6(ring, 7);
+
+ OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
+
+ WRITE(REG_A6XX_VFD_MULTIVIEW_CNTL, 0);
+
+ OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
+
+ /* Clear any potential pending state groups to be safe: */
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* VPC_SO_STREAM_CNTL */
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ if (!batch->nondraw) {
+ trace_end_state_restore(&batch->trace);
+ }
}
static void
fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
- unsigned dst_off, struct pipe_resource *src, unsigned src_off,
- unsigned sizedwords)
+ unsigned dst_off, struct pipe_resource *src, unsigned src_off,
+ unsigned sizedwords)
{
- struct fd_bo *src_bo = fd_resource(src)->bo;
- struct fd_bo *dst_bo = fd_resource(dst)->bo;
- unsigned i;
-
- for (i = 0; i < sizedwords; i++) {
- OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
- OUT_RELOC(ring, src_bo, src_off, 0, 0);
-
- dst_off += 4;
- src_off += 4;
- }
+ struct fd_bo *src_bo = fd_resource(src)->bo;
+ struct fd_bo *dst_bo = fd_resource(dst)->bo;
+ unsigned i;
+
+ for (i = 0; i < sizedwords; i++) {
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
+ OUT_RELOC(ring, src_bo, src_off, 0, 0);
+
+ dst_off += 4;
+ src_off += 4;
+ }
}
/* this is *almost* the same as fd6_cache_flush().. which I guess
* figuring out which events trigger what state to flush..
*/
static void
-fd6_framebuffer_barrier(struct fd_context *ctx)
- assert_dt
+fd6_framebuffer_barrier(struct fd_context *ctx) assert_dt
{
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd_batch *batch = ctx->batch;
- struct fd_ringbuffer *ring = batch->draw;
- unsigned seqno;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd_batch *batch = ctx->batch;
+ struct fd_ringbuffer *ring = batch->draw;
+ unsigned seqno;
- seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
+ seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
- OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
- OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
- CP_WAIT_REG_MEM_0_POLL_MEMORY);
- OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
- OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
- OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
- OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
+ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
+ OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
+ CP_WAIT_REG_MEM_0_POLL_MEMORY);
+ OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
+ OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
- seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+ seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
- fd6_event_write(batch, ring, 0x31, false);
+ fd6_event_write(batch, ring, 0x31, false);
- OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
- OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
- OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
- OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
+ OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
+ OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
+ OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
+ OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
}
void
fd6_emit_init_screen(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- screen->emit_ib = fd6_emit_ib;
- screen->mem_to_mem = fd6_mem_to_mem;
+ struct fd_screen *screen = fd_screen(pscreen);
+ screen->emit_ib = fd6_emit_ib;
+ screen->mem_to_mem = fd6_mem_to_mem;
}
void
-fd6_emit_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_emit_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->framebuffer_barrier = fd6_framebuffer_barrier;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->framebuffer_barrier = fd6_framebuffer_barrier;
}
#include "pipe/p_context.h"
-#include "freedreno_context.h"
#include "fd6_context.h"
#include "fd6_format.h"
#include "fd6_program.h"
+#include "freedreno_context.h"
#include "ir3_gallium.h"
struct fd_ringbuffer;
* need to be emit'd.
*/
enum fd6_state_id {
- FD6_GROUP_PROG_CONFIG,
- FD6_GROUP_PROG,
- FD6_GROUP_PROG_BINNING,
- FD6_GROUP_PROG_INTERP,
- FD6_GROUP_PROG_FB_RAST,
- FD6_GROUP_LRZ,
- FD6_GROUP_LRZ_BINNING,
- FD6_GROUP_VTXSTATE,
- FD6_GROUP_VBO,
- FD6_GROUP_CONST,
- FD6_GROUP_VS_DRIVER_PARAMS,
- FD6_GROUP_PRIMITIVE_PARAMS,
- FD6_GROUP_VS_TEX,
- FD6_GROUP_HS_TEX,
- FD6_GROUP_DS_TEX,
- FD6_GROUP_GS_TEX,
- FD6_GROUP_FS_TEX,
- FD6_GROUP_RASTERIZER,
- FD6_GROUP_ZSA,
- FD6_GROUP_BLEND,
- FD6_GROUP_SCISSOR,
- FD6_GROUP_BLEND_COLOR,
- FD6_GROUP_SO,
- FD6_GROUP_IBO,
- FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
+ FD6_GROUP_PROG_CONFIG,
+ FD6_GROUP_PROG,
+ FD6_GROUP_PROG_BINNING,
+ FD6_GROUP_PROG_INTERP,
+ FD6_GROUP_PROG_FB_RAST,
+ FD6_GROUP_LRZ,
+ FD6_GROUP_LRZ_BINNING,
+ FD6_GROUP_VTXSTATE,
+ FD6_GROUP_VBO,
+ FD6_GROUP_CONST,
+ FD6_GROUP_VS_DRIVER_PARAMS,
+ FD6_GROUP_PRIMITIVE_PARAMS,
+ FD6_GROUP_VS_TEX,
+ FD6_GROUP_HS_TEX,
+ FD6_GROUP_DS_TEX,
+ FD6_GROUP_GS_TEX,
+ FD6_GROUP_FS_TEX,
+ FD6_GROUP_RASTERIZER,
+ FD6_GROUP_ZSA,
+ FD6_GROUP_BLEND,
+ FD6_GROUP_SCISSOR,
+ FD6_GROUP_BLEND_COLOR,
+ FD6_GROUP_SO,
+ FD6_GROUP_IBO,
+ FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
};
-#define ENABLE_ALL (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
+#define ENABLE_ALL \
+ (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | \
+ CP_SET_DRAW_STATE__0_SYSMEM)
#define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
struct fd6_state_group {
- struct fd_ringbuffer *stateobj;
- enum fd6_state_id group_id;
- /* enable_mask controls which states the stateobj is evaluated in,
- * b0 is binning pass b1 and/or b2 is draw pass
- */
- uint32_t enable_mask;
+ struct fd_ringbuffer *stateobj;
+ enum fd6_state_id group_id;
+ /* enable_mask controls which states the stateobj is evaluated in,
+ * b0 is binning pass b1 and/or b2 is draw pass
+ */
+ uint32_t enable_mask;
};
/* grouped together emit-state for prog/vertex/state emit: */
struct fd6_emit {
- struct fd_context *ctx;
- const struct fd_vertex_state *vtx;
- const struct pipe_draw_info *info;
- const struct pipe_draw_indirect_info *indirect;
- const struct pipe_draw_start_count *draw;
- struct ir3_cache_key key;
- enum fd_dirty_3d_state dirty;
- uint32_t dirty_groups;
-
- uint32_t sprite_coord_enable; /* bitmask */
- bool sprite_coord_mode;
- bool rasterflat;
- bool primitive_restart;
-
- /* cached to avoid repeated lookups: */
- const struct fd6_program_state *prog;
-
- struct ir3_shader_variant *bs;
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *hs;
- struct ir3_shader_variant *ds;
- struct ir3_shader_variant *gs;
- struct ir3_shader_variant *fs;
-
- unsigned streamout_mask;
-
- struct fd6_state_group groups[32];
- unsigned num_groups;
+ struct fd_context *ctx;
+ const struct fd_vertex_state *vtx;
+ const struct pipe_draw_info *info;
+ const struct pipe_draw_indirect_info *indirect;
+ const struct pipe_draw_start_count *draw;
+ struct ir3_cache_key key;
+ enum fd_dirty_3d_state dirty;
+ uint32_t dirty_groups;
+
+ uint32_t sprite_coord_enable; /* bitmask */
+ bool sprite_coord_mode;
+ bool rasterflat;
+ bool primitive_restart;
+
+ /* cached to avoid repeated lookups: */
+ const struct fd6_program_state *prog;
+
+ struct ir3_shader_variant *bs;
+ struct ir3_shader_variant *vs;
+ struct ir3_shader_variant *hs;
+ struct ir3_shader_variant *ds;
+ struct ir3_shader_variant *gs;
+ struct ir3_shader_variant *fs;
+
+ unsigned streamout_mask;
+
+ struct fd6_state_group groups[32];
+ unsigned num_groups;
};
static inline const struct fd6_program_state *
fd6_emit_get_prog(struct fd6_emit *emit)
{
- if (!emit->prog) {
- struct ir3_program_state *s =
- ir3_cache_lookup(emit->ctx->shader_cache, &emit->key, &emit->ctx->debug);
- emit->prog = fd6_program_state(s);
- }
- return emit->prog;
+ if (!emit->prog) {
+ struct ir3_program_state *s = ir3_cache_lookup(
+ emit->ctx->shader_cache, &emit->key, &emit->ctx->debug);
+ emit->prog = fd6_program_state(s);
+ }
+ return emit->prog;
}
static inline void
fd6_emit_take_group(struct fd6_emit *emit, struct fd_ringbuffer *stateobj,
- enum fd6_state_id group_id, unsigned enable_mask)
+ enum fd6_state_id group_id, unsigned enable_mask)
{
- debug_assert(emit->num_groups < ARRAY_SIZE(emit->groups));
- struct fd6_state_group *g = &emit->groups[emit->num_groups++];
- g->stateobj = stateobj;
- g->group_id = group_id;
- g->enable_mask = enable_mask;
+ debug_assert(emit->num_groups < ARRAY_SIZE(emit->groups));
+ struct fd6_state_group *g = &emit->groups[emit->num_groups++];
+ g->stateobj = stateobj;
+ g->group_id = group_id;
+ g->enable_mask = enable_mask;
}
static inline void
fd6_emit_add_group(struct fd6_emit *emit, struct fd_ringbuffer *stateobj,
- enum fd6_state_id group_id, unsigned enable_mask)
+ enum fd6_state_id group_id, unsigned enable_mask)
{
- fd6_emit_take_group(emit, fd_ringbuffer_ref(stateobj), group_id, enable_mask);
+ fd6_emit_take_group(emit, fd_ringbuffer_ref(stateobj), group_id,
+ enable_mask);
}
static inline unsigned
fd6_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum vgt_event_type evt, bool timestamp)
+ enum vgt_event_type evt, bool timestamp)
{
- unsigned seqno = 0;
+ unsigned seqno = 0;
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
- OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
- if (timestamp) {
- struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
- seqno = ++fd6_ctx->seqno;
- OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */
- OUT_RING(ring, seqno);
- }
+ OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
+ if (timestamp) {
+ struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
+ seqno = ++fd6_ctx->seqno;
+ OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */
+ OUT_RING(ring, seqno);
+ }
- return seqno;
+ return seqno;
}
static inline void
fd6_cache_inv(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- fd6_event_write(batch, ring, CACHE_INVALIDATE, false);
+ fd6_event_write(batch, ring, CACHE_INVALIDATE, false);
}
static inline void
fd6_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
- unsigned seqno;
+ struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
+ unsigned seqno;
- seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
+ seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
- OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
- OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
- CP_WAIT_REG_MEM_0_POLL_MEMORY);
- OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
- OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
- OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
- OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
+ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
+ OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
+ CP_WAIT_REG_MEM_0_POLL_MEMORY);
+ OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
+ OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
- seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+ seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
- OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
- OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
- OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
- OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
+ OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
+ OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
+ OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
+ OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
}
static inline void
fd6_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- emit_marker6(ring, 7);
- fd6_event_write(batch, ring, BLIT, false);
- emit_marker6(ring, 7);
+ emit_marker6(ring, 7);
+ fd6_event_write(batch, ring, BLIT, false);
+ emit_marker6(ring, 7);
}
static inline void
fd6_emit_lrz_flush(struct fd_ringbuffer *ring)
{
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, LRZ_FLUSH);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, LRZ_FLUSH);
}
static inline bool
fd6_geom_stage(gl_shader_stage type)
{
- switch (type) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- case MESA_SHADER_GEOMETRY:
- return true;
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_KERNEL:
- return false;
- default:
- unreachable("bad shader type");
- }
+ switch (type) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ return true;
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ return false;
+ default:
+ unreachable("bad shader type");
+ }
}
static inline uint32_t
fd6_stage2opcode(gl_shader_stage type)
{
- return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
+ return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
}
static inline enum a6xx_state_block
fd6_stage2shadersb(gl_shader_stage type)
{
- switch (type) {
- case MESA_SHADER_VERTEX:
- return SB6_VS_SHADER;
- case MESA_SHADER_TESS_CTRL:
- return SB6_HS_SHADER;
- case MESA_SHADER_TESS_EVAL:
- return SB6_DS_SHADER;
- case MESA_SHADER_GEOMETRY:
- return SB6_GS_SHADER;
- case MESA_SHADER_FRAGMENT:
- return SB6_FS_SHADER;
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_KERNEL:
- return SB6_CS_SHADER;
- default:
- unreachable("bad shader type");
- return ~0;
- }
+ switch (type) {
+ case MESA_SHADER_VERTEX:
+ return SB6_VS_SHADER;
+ case MESA_SHADER_TESS_CTRL:
+ return SB6_HS_SHADER;
+ case MESA_SHADER_TESS_EVAL:
+ return SB6_DS_SHADER;
+ case MESA_SHADER_GEOMETRY:
+ return SB6_GS_SHADER;
+ case MESA_SHADER_FRAGMENT:
+ return SB6_FS_SHADER;
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ return SB6_CS_SHADER;
+ default:
+ unreachable("bad shader type");
+ return ~0;
+ }
}
static inline enum a6xx_tess_spacing
fd6_gl2spacing(enum gl_tess_spacing spacing)
{
- switch (spacing) {
- case TESS_SPACING_EQUAL:
- return TESS_EQUAL;
- case TESS_SPACING_FRACTIONAL_ODD:
- return TESS_FRACTIONAL_ODD;
- case TESS_SPACING_FRACTIONAL_EVEN:
- return TESS_FRACTIONAL_EVEN;
- case TESS_SPACING_UNSPECIFIED:
- default:
- unreachable("spacing must be specified");
- }
+ switch (spacing) {
+ case TESS_SPACING_EQUAL:
+ return TESS_EQUAL;
+ case TESS_SPACING_FRACTIONAL_ODD:
+ return TESS_FRACTIONAL_ODD;
+ case TESS_SPACING_FRACTIONAL_EVEN:
+ return TESS_FRACTIONAL_EVEN;
+ case TESS_SPACING_UNSPECIFIED:
+ default:
+ unreachable("spacing must be specified");
+ }
}
bool fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type type, struct fd_texture_stateobj *tex,
- unsigned bcolor_offset,
- const struct ir3_shader_variant *v) assert_dt;
+ enum pipe_shader_type type,
+ struct fd_texture_stateobj *tex, unsigned bcolor_offset,
+ const struct ir3_shader_variant *v) assert_dt;
-void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt;
+void fd6_emit_state(struct fd_ringbuffer *ring,
+ struct fd6_emit *emit) assert_dt;
void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct ir3_shader_variant *cp) assert_dt;
+ struct ir3_shader_variant *cp) assert_dt;
void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
static inline void
fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- emit_marker6(ring, 6);
- __OUT_IB5(ring, target);
- emit_marker6(ring, 6);
+ emit_marker6(ring, 6);
+ __OUT_IB5(ring, target);
+ emit_marker6(ring, 6);
}
-#define WRITE(reg, val) do { \
- OUT_PKT4(ring, reg, 1); \
- OUT_RING(ring, val); \
- } while (0)
-
+#define WRITE(reg, val) \
+ do { \
+ OUT_PKT4(ring, reg, 1); \
+ OUT_RING(ring, val); \
+ } while (0)
#endif /* FD6_EMIT_H */
#include "fd6_format.h"
#include "freedreno_resource.h"
-
/* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables.
*/
struct fd6_format {
- enum a6xx_format vtx;
- enum a6xx_format tex;
- enum a6xx_format rb;
- enum a3xx_color_swap swap;
- boolean present;
+ enum a6xx_format vtx;
+ enum a6xx_format tex;
+ enum a6xx_format rb;
+ enum a3xx_color_swap swap;
+ boolean present;
};
-#define FMT(pipe, vtxfmt, texfmt, rbfmt, swapfmt) \
- [PIPE_FORMAT_ ## pipe] = { \
- .present = 1, \
- .vtx = FMT6_ ## vtxfmt, \
- .tex = FMT6_ ## texfmt, \
- .rb = FMT6_ ## rbfmt, \
- .swap = swapfmt \
- }
+#define FMT(pipe, vtxfmt, texfmt, rbfmt, swapfmt) \
+ [PIPE_FORMAT_##pipe] = {.present = 1, \
+ .vtx = FMT6_##vtxfmt, \
+ .tex = FMT6_##texfmt, \
+ .rb = FMT6_##rbfmt, \
+ .swap = swapfmt}
/* vertex + texture + color */
#define VTC(pipe, fmt, swapfmt) FMT(pipe, fmt, fmt, fmt, swapfmt)
enum a6xx_format
fd6_pipe2vtx(enum pipe_format format)
{
- if (!formats[format].present)
- return FMT6_NONE;
- return formats[format].vtx;
+ if (!formats[format].present)
+ return FMT6_NONE;
+ return formats[format].vtx;
}
/* convert pipe format to texture sampler format: */
enum a6xx_format
fd6_pipe2tex(enum pipe_format format)
{
- if (!formats[format].present)
- return FMT6_NONE;
- return formats[format].tex;
+ if (!formats[format].present)
+ return FMT6_NONE;
+ return formats[format].tex;
}
/* convert pipe format to MRT / copydest format used for render-target: */
enum a6xx_format
fd6_pipe2color(enum pipe_format format)
{
- if (!formats[format].present)
- return FMT6_NONE;
- return formats[format].rb;
+ if (!formats[format].present)
+ return FMT6_NONE;
+ return formats[format].rb;
}
enum a3xx_color_swap
fd6_pipe2swap(enum pipe_format format)
{
- if (!formats[format].present)
- return WZYX;
- return formats[format].swap;
+ if (!formats[format].present)
+ return WZYX;
+ return formats[format].swap;
}
enum a6xx_depth_format
fd6_pipe2depth(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTH6_16;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- return DEPTH6_24_8;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return DEPTH6_32;
- default:
- return ~0;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH6_16;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ return DEPTH6_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTH6_32;
+ default:
+ return ~0;
+ }
}
enum a6xx_tex_swiz
fd6_pipe2swiz(unsigned swiz)
{
- switch (swiz) {
- default:
- case PIPE_SWIZZLE_X: return A6XX_TEX_X;
- case PIPE_SWIZZLE_Y: return A6XX_TEX_Y;
- case PIPE_SWIZZLE_Z: return A6XX_TEX_Z;
- case PIPE_SWIZZLE_W: return A6XX_TEX_W;
- case PIPE_SWIZZLE_0: return A6XX_TEX_ZERO;
- case PIPE_SWIZZLE_1: return A6XX_TEX_ONE;
- }
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_X:
+ return A6XX_TEX_X;
+ case PIPE_SWIZZLE_Y:
+ return A6XX_TEX_Y;
+ case PIPE_SWIZZLE_Z:
+ return A6XX_TEX_Z;
+ case PIPE_SWIZZLE_W:
+ return A6XX_TEX_W;
+ case PIPE_SWIZZLE_0:
+ return A6XX_TEX_ZERO;
+ case PIPE_SWIZZLE_1:
+ return A6XX_TEX_ONE;
+ }
}
void
-fd6_tex_swiz(enum pipe_format format, unsigned char *swiz,
- unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+fd6_tex_swiz(enum pipe_format format, unsigned char *swiz, unsigned swizzle_r,
+ unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a)
{
- const struct util_format_description *desc =
- util_format_description(format);
- const unsigned char uswiz[4] = {
- swizzle_r, swizzle_g, swizzle_b, swizzle_a
- };
-
- /* Gallium expects stencil sampler to return (s,s,s,s), so massage
- * the swizzle to do so.
- */
- if (format == PIPE_FORMAT_X24S8_UINT) {
- const unsigned char stencil_swiz[4] = {
- PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W
- };
- util_format_compose_swizzles(stencil_swiz, uswiz, swiz);
- } else if (fd6_pipe2swap(format) != WZYX) {
- /* Formats with a non-pass-through swap are permutations of RGBA
- * formats. We program the permutation using the swap and don't
- * need to compose the format swizzle with the user swizzle.
- */
- memcpy(swiz, uswiz, sizeof(uswiz));
- } else {
- /* Otherwise, it's an unswapped RGBA format or a format like L8 where
- * we need the XXX1 swizzle from the gallium format description.
- */
- util_format_compose_swizzles(desc->swizzle, uswiz, swiz);
- }
+ const struct util_format_description *desc = util_format_description(format);
+ const unsigned char uswiz[4] = {swizzle_r, swizzle_g, swizzle_b, swizzle_a};
+
+ /* Gallium expects stencil sampler to return (s,s,s,s), so massage
+ * the swizzle to do so.
+ */
+ if (format == PIPE_FORMAT_X24S8_UINT) {
+ const unsigned char stencil_swiz[4] = {PIPE_SWIZZLE_W, PIPE_SWIZZLE_W,
+ PIPE_SWIZZLE_W, PIPE_SWIZZLE_W};
+ util_format_compose_swizzles(stencil_swiz, uswiz, swiz);
+ } else if (fd6_pipe2swap(format) != WZYX) {
+ /* Formats with a non-pass-through swap are permutations of RGBA
+ * formats. We program the permutation using the swap and don't
+ * need to compose the format swizzle with the user swizzle.
+ */
+ memcpy(swiz, uswiz, sizeof(uswiz));
+ } else {
+ /* Otherwise, it's an unswapped RGBA format or a format like L8 where
+ * we need the XXX1 swizzle from the gallium format description.
+ */
+ util_format_compose_swizzles(desc->swizzle, uswiz, swiz);
+ }
}
/* Compute the TEX_CONST_0 value for texture state, including SWIZ/SWAP/etc: */
uint32_t
-fd6_tex_const_0(struct pipe_resource *prsc,
- unsigned level, enum pipe_format format,
- unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a)
+fd6_tex_const_0(struct pipe_resource *prsc, unsigned level,
+ enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+ unsigned swizzle_b, unsigned swizzle_a)
{
- struct fd_resource *rsc = fd_resource(prsc);
- unsigned char swiz[4];
-
- fd6_tex_swiz(format, swiz,
- swizzle_r, swizzle_g,
- swizzle_b, swizzle_a);
-
- return
- A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
- A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
- A6XX_TEX_CONST_0_SWAP(fd6_resource_swap(rsc, format)) |
- A6XX_TEX_CONST_0_TILE_MODE(fd_resource_tile_mode(prsc, level)) |
- COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
- A6XX_TEX_CONST_0_SWIZ_X(fd6_pipe2swiz(swiz[0])) |
- A6XX_TEX_CONST_0_SWIZ_Y(fd6_pipe2swiz(swiz[1])) |
- A6XX_TEX_CONST_0_SWIZ_Z(fd6_pipe2swiz(swiz[2])) |
- A6XX_TEX_CONST_0_SWIZ_W(fd6_pipe2swiz(swiz[3]));
+ struct fd_resource *rsc = fd_resource(prsc);
+ unsigned char swiz[4];
+
+ fd6_tex_swiz(format, swiz, swizzle_r, swizzle_g, swizzle_b, swizzle_a);
+
+ return A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
+ A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
+ A6XX_TEX_CONST_0_SWAP(fd6_resource_swap(rsc, format)) |
+ A6XX_TEX_CONST_0_TILE_MODE(fd_resource_tile_mode(prsc, level)) |
+ COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
+ A6XX_TEX_CONST_0_SWIZ_X(fd6_pipe2swiz(swiz[0])) |
+ A6XX_TEX_CONST_0_SWIZ_Y(fd6_pipe2swiz(swiz[1])) |
+ A6XX_TEX_CONST_0_SWIZ_Z(fd6_pipe2swiz(swiz[2])) |
+ A6XX_TEX_CONST_0_SWIZ_W(fd6_pipe2swiz(swiz[3]));
}
enum a6xx_tex_swiz fd6_pipe2swiz(unsigned swiz);
void fd6_tex_swiz(enum pipe_format format, unsigned char *swiz,
- unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a);
+ unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
-uint32_t fd6_tex_const_0(struct pipe_resource *prsc,
- unsigned level, enum pipe_format format,
- unsigned swizzle_r, unsigned swizzle_g,
- unsigned swizzle_b, unsigned swizzle_a);
+uint32_t fd6_tex_const_0(struct pipe_resource *prsc, unsigned level,
+ enum pipe_format format, unsigned swizzle_r,
+ unsigned swizzle_g, unsigned swizzle_b,
+ unsigned swizzle_a);
static inline uint32_t
fd6_resource_swap(struct fd_resource *rsc, enum pipe_format format)
{
- return rsc->layout.tile_mode ? WZYX : fd6_pipe2swap(format);
+ return rsc->layout.tile_mode ? WZYX : fd6_pipe2swap(format);
}
#endif /* FD6_UTIL_H_ */
#include <stdio.h>
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_draw.h"
-#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_state.h"
#include "freedreno_tracepoints.h"
#include "fd6_blitter.h"
-#include "fd6_gmem.h"
#include "fd6_context.h"
#include "fd6_draw.h"
#include "fd6_emit.h"
-#include "fd6_program.h"
#include "fd6_format.h"
+#include "fd6_gmem.h"
+#include "fd6_pack.h"
+#include "fd6_program.h"
#include "fd6_resource.h"
#include "fd6_zsa.h"
-#include "fd6_pack.h"
/**
* Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
*/
void
fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
- int level, int layer)
+ int level, int layer)
{
- if (fd_resource_ubwc_enabled(rsc, level)) {
- OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0, 0);
- OUT_RING(ring,
- A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(fdl_ubwc_pitch(&rsc->layout, level)) |
- A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
- } else {
- OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
- OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
- OUT_RING(ring, 0x00000000);
- }
+ if (fd_resource_ubwc_enabled(rsc, level)) {
+ OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
+ 0);
+ OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
+ fdl_ubwc_pitch(&rsc->layout, level)) |
+ A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
+ rsc->layout.ubwc_layer_size >> 2));
+ } else {
+ OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
+ OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
+ OUT_RING(ring, 0x00000000);
+ }
}
static void
emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
- const struct fd_gmem_stateobj *gmem)
+ const struct fd_gmem_stateobj *gmem)
{
- unsigned srgb_cntl = 0;
- unsigned i;
-
- unsigned max_layer_index = 0;
-
- for (i = 0; i < pfb->nr_cbufs; i++) {
- enum a6xx_format format = 0;
- enum a3xx_color_swap swap = WZYX;
- bool sint = false, uint = false;
- struct fd_resource *rsc = NULL;
- struct fdl_slice *slice = NULL;
- uint32_t stride = 0;
- uint32_t array_stride = 0;
- uint32_t offset;
- uint32_t tile_mode;
-
- if (!pfb->cbufs[i])
- continue;
-
- struct pipe_surface *psurf = pfb->cbufs[i];
- enum pipe_format pformat = psurf->format;
- rsc = fd_resource(psurf->texture);
- if (!rsc->bo)
- continue;
-
- uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
- slice = fd_resource_slice(rsc, psurf->u.tex.level);
- format = fd6_pipe2color(pformat);
- sint = util_format_is_pure_sint(pformat);
- uint = util_format_is_pure_uint(pformat);
-
- if (util_format_is_srgb(pformat))
- srgb_cntl |= (1 << i);
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
-
- stride = fd_resource_pitch(rsc, psurf->u.tex.level);
- array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
- swap = fd6_resource_swap(rsc, pformat);
-
- tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
- max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
-
- debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
-
- OUT_REG(ring,
- A6XX_RB_MRT_BUF_INFO(i,
- .color_format = format,
- .color_tile_mode = tile_mode,
- .color_swap = swap),
- A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
- A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
- A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
- A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
-
- OUT_REG(ring,
- A6XX_SP_FS_MRT_REG(i, .color_format = format,
- .color_sint = sint, .color_uint = uint));
-
- OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
- fd6_emit_flag_reference(ring, rsc,
- psurf->u.tex.level, psurf->u.tex.first_layer);
- }
-
- OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
- OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
-
- OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
+ unsigned srgb_cntl = 0;
+ unsigned i;
+
+ unsigned max_layer_index = 0;
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ enum a6xx_format format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ bool sint = false, uint = false;
+ struct fd_resource *rsc = NULL;
+ struct fdl_slice *slice = NULL;
+ uint32_t stride = 0;
+ uint32_t array_stride = 0;
+ uint32_t offset;
+ uint32_t tile_mode;
+
+ if (!pfb->cbufs[i])
+ continue;
+
+ struct pipe_surface *psurf = pfb->cbufs[i];
+ enum pipe_format pformat = psurf->format;
+ rsc = fd_resource(psurf->texture);
+ if (!rsc->bo)
+ continue;
+
+ uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
+ slice = fd_resource_slice(rsc, psurf->u.tex.level);
+ format = fd6_pipe2color(pformat);
+ sint = util_format_is_pure_sint(pformat);
+ uint = util_format_is_pure_uint(pformat);
+
+ if (util_format_is_srgb(pformat))
+ srgb_cntl |= (1 << i);
+
+ offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+
+ stride = fd_resource_pitch(rsc, psurf->u.tex.level);
+ array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
+ swap = fd6_resource_swap(rsc, pformat);
+
+ tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
+ max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
+
+ debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
+
+ OUT_REG(
+ ring,
+ A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
+ .color_tile_mode = tile_mode, .color_swap = swap),
+ A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
+ A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
+ A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
+ A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
+
+ OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
+ .color_sint = sint, .color_uint = uint));
+
+ OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
+ fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+ }
+
+ OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
+ OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
+
+ OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
}
static void
emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
- const struct fd_gmem_stateobj *gmem)
+ const struct fd_gmem_stateobj *gmem)
{
- if (zsbuf) {
- struct fd_resource *rsc = fd_resource(zsbuf->texture);
- enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
- uint32_t stride = fd_resource_pitch(rsc, 0);
- uint32_t array_stride = fd_resource_layer_stride(rsc, 0);
- uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
- uint32_t offset = fd_resource_offset(rsc, zsbuf->u.tex.level,
- zsbuf->u.tex.first_layer);
-
- OUT_REG(ring,
- A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
- A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
- A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch = array_stride),
- A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
- A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
-
- OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
-
- OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
- fd6_emit_flag_reference(ring, rsc,
- zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
-
- if (rsc->lrz) {
- OUT_REG(ring,
- A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
- A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
- // XXX a6xx seems to use a different buffer here.. not sure what for..
- A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
- } else {
- OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000);
- }
-
- /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
- * plus this CP_EVENT_WRITE at the end in it's own IB..
- */
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
-
- if (rsc->stencil) {
- stride = fd_resource_pitch(rsc->stencil, 0);
- array_stride = fd_resource_layer_stride(rsc->stencil, 0);
- uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
-
- OUT_REG(ring,
- A6XX_RB_STENCIL_INFO(.separate_stencil = true),
- A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch = stride),
- A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(.a6xx_rb_stencil_buffer_array_pitch = array_stride),
- A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
- A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
- } else {
- OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
- }
- } else {
- OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
- OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
-
- OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
- OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
-
- OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
- }
+ if (zsbuf) {
+ struct fd_resource *rsc = fd_resource(zsbuf->texture);
+ enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
+ uint32_t stride = fd_resource_pitch(rsc, 0);
+ uint32_t array_stride = fd_resource_layer_stride(rsc, 0);
+ uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
+ uint32_t offset =
+ fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
+
+ OUT_REG(
+ ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
+ A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
+ A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =
+ array_stride),
+ A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
+ A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
+
+ OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
+
+ OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
+ fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
+ zsbuf->u.tex.first_layer);
+
+ if (rsc->lrz) {
+ OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
+ A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
+ // XXX a6xx seems to use a different buffer here.. not sure
+ // what for..
+ A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
+ } else {
+ OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
+ * plus this CP_EVENT_WRITE at the end in it's own IB..
+ */
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
+
+ if (rsc->stencil) {
+ stride = fd_resource_pitch(rsc->stencil, 0);
+ array_stride = fd_resource_layer_stride(rsc->stencil, 0);
+ uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
+
+ OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
+ A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =
+ stride),
+ A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(
+ .a6xx_rb_stencil_buffer_array_pitch = array_stride),
+ A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
+ A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
+ } else {
+ OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
+ }
+ } else {
+ OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
+ OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
+
+ OUT_REG(ring,
+ A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
+
+ OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
+ }
}
static bool
use_hw_binning(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- if ((gmem->maxpw * gmem->maxph) > 32)
- return false;
+ if ((gmem->maxpw * gmem->maxph) > 32)
+ return false;
- return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
- (batch->num_draws > 0);
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
+ (batch->num_draws > 0);
}
static void
patch_fb_read(struct fd_batch *batch)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
-
- for (unsigned i = 0; i < fd_patch_num_elements(&batch->fb_read_patches); i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
- *patch->cs = patch->val | A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]);
- }
- util_dynarray_clear(&batch->fb_read_patches);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+
+ for (unsigned i = 0; i < fd_patch_num_elements(&batch->fb_read_patches);
+ i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
+ *patch->cs =
+ patch->val | A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]);
+ }
+ util_dynarray_clear(&batch->fb_read_patches);
}
static void
-update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, bool binning)
+update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
+ bool binning)
{
- struct fd_ringbuffer *ring = batch->gmem;
- uint32_t cntl = 0;
- bool depth_ubwc_enable = false;
- uint32_t mrts_ubwc_enable = 0;
- int i;
-
- if (pfb->zsbuf) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- depth_ubwc_enable = fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
- }
-
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
-
- struct pipe_surface *psurf = pfb->cbufs[i];
- struct fd_resource *rsc = fd_resource(psurf->texture);
- if (!rsc->bo)
- continue;
-
- if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
- mrts_ubwc_enable |= 1 << i;
- }
-
- cntl |= A6XX_RB_RENDER_CNTL_UNK4;
- if (binning)
- cntl |= A6XX_RB_RENDER_CNTL_BINNING;
-
- OUT_PKT7(ring, CP_REG_WRITE, 3);
- OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
- OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
- OUT_RING(ring, cntl |
- COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
- A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
+ struct fd_ringbuffer *ring = batch->gmem;
+ uint32_t cntl = 0;
+ bool depth_ubwc_enable = false;
+ uint32_t mrts_ubwc_enable = 0;
+ int i;
+
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ depth_ubwc_enable =
+ fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
+ }
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+
+ struct pipe_surface *psurf = pfb->cbufs[i];
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ if (!rsc->bo)
+ continue;
+
+ if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
+ mrts_ubwc_enable |= 1 << i;
+ }
+
+ cntl |= A6XX_RB_RENDER_CNTL_UNK4;
+ if (binning)
+ cntl |= A6XX_RB_RENDER_CNTL_BINNING;
+
+ OUT_PKT7(ring, CP_REG_WRITE, 3);
+ OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
+ OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
+ OUT_RING(ring, cntl |
+ COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
+ A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
}
/* extra size to store VSC_DRAW_STRM_SIZE: */
-#define VSC_DRAW_STRM_SIZE(pitch) ((pitch) * 32 + 0x100)
-#define VSC_PRIM_STRM_SIZE(pitch) ((pitch) * 32)
+#define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
+#define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
static void
update_vsc_pipe(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_ringbuffer *ring = batch->gmem;
- int i;
-
- if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
- if (fd6_ctx->vsc_draw_strm)
- fd_bo_del(fd6_ctx->vsc_draw_strm);
- fd6_ctx->vsc_draw_strm = NULL;
- /* Note: probably only need to align to 0x40, but aligning stronger
- * reduces the odds that we will have to realloc again on the next
- * frame:
- */
- fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
- mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
- fd6_ctx->vsc_draw_strm_pitch);
- }
-
- if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
- if (fd6_ctx->vsc_prim_strm)
- fd_bo_del(fd6_ctx->vsc_prim_strm);
- fd6_ctx->vsc_prim_strm = NULL;
- fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
- mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
- fd6_ctx->vsc_prim_strm_pitch);
- }
-
- if (!fd6_ctx->vsc_draw_strm) {
- fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
- VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_draw_strm");
- }
-
- if (!fd6_ctx->vsc_prim_strm) {
- fd6_ctx->vsc_prim_strm = fd_bo_new(ctx->screen->dev,
- VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
- DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_prim_strm");
- }
-
- OUT_REG(ring,
- A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
- A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(
- .bo = fd6_ctx->vsc_draw_strm,
- .bo_offset = 32 * fd6_ctx->vsc_draw_strm_pitch));
-
- OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x,
- .ny = gmem->nbins_y));
-
- OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
- for (i = 0; i < 32; i++) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
- A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
- A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
- A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
- }
-
- OUT_REG(ring,
- A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
- A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
- A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
-
- OUT_REG(ring,
- A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
- A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
- A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
+ struct fd_context *ctx = batch->ctx;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
+ if (fd6_ctx->vsc_draw_strm)
+ fd_bo_del(fd6_ctx->vsc_draw_strm);
+ fd6_ctx->vsc_draw_strm = NULL;
+ /* Note: probably only need to align to 0x40, but aligning stronger
+ * reduces the odds that we will have to realloc again on the next
+ * frame:
+ */
+ fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
+ mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
+ fd6_ctx->vsc_draw_strm_pitch);
+ }
+
+ if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
+ if (fd6_ctx->vsc_prim_strm)
+ fd_bo_del(fd6_ctx->vsc_prim_strm);
+ fd6_ctx->vsc_prim_strm = NULL;
+ fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
+ mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
+ fd6_ctx->vsc_prim_strm_pitch);
+ }
+
+ if (!fd6_ctx->vsc_draw_strm) {
+ fd6_ctx->vsc_draw_strm = fd_bo_new(
+ ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_draw_strm");
+ }
+
+ if (!fd6_ctx->vsc_prim_strm) {
+ fd6_ctx->vsc_prim_strm = fd_bo_new(
+ ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_prim_strm");
+ }
+
+ OUT_REG(
+ ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
+ A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
+ .bo_offset =
+ 32 * fd6_ctx->vsc_draw_strm_pitch));
+
+ OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
+
+ OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
+ for (i = 0; i < 32; i++) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+ A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+ A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+ A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+ }
+
+ OUT_REG(
+ ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
+ A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
+ A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
+
+ OUT_REG(
+ ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
+ A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
+ A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
}
/*
static void
emit_vsc_overflow_test(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
-
- debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
- debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
-
- /* Check for overflow, write vsc_scratch if detected: */
- for (int i = 0; i < gmem->num_vsc_pipes; i++) {
- OUT_PKT7(ring, CP_COND_WRITE5, 8);
- OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
- CP_COND_WRITE5_0_WRITE_MEMORY);
- OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
- OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
- OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
- OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
- OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
- OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
-
- OUT_PKT7(ring, CP_COND_WRITE5, 8);
- OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
- CP_COND_WRITE5_0_WRITE_MEMORY);
- OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
- OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
- OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
- OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
- OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
- OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
- }
-
- OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
+
+ debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
+ debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
+
+ /* Check for overflow, write vsc_scratch if detected: */
+ for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+ OUT_PKT7(ring, CP_COND_WRITE5, 8);
+ OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
+ CP_COND_WRITE5_0_WRITE_MEMORY);
+ OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
+ REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
+ OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
+ OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
+ OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
+ OUT_RELOC(ring,
+ control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
+ OUT_RING(ring,
+ CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
+
+ OUT_PKT7(ring, CP_COND_WRITE5, 8);
+ OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
+ CP_COND_WRITE5_0_WRITE_MEMORY);
+ OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
+ REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
+ OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
+ OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
+ OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
+ OUT_RELOC(ring,
+ control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
+ OUT_RING(ring,
+ CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
+ }
+
+ OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
}
static void
check_vsc_overflow(struct fd_context *ctx)
{
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
- uint32_t vsc_overflow = control->vsc_overflow;
-
- if (!vsc_overflow)
- return;
-
- /* clear overflow flag: */
- control->vsc_overflow = 0;
-
- unsigned buffer = vsc_overflow & 0x3;
- unsigned size = vsc_overflow & ~0x3;
-
- if (buffer == 0x1) {
- /* VSC_DRAW_STRM overflow: */
-
- if (size < fd6_ctx->vsc_draw_strm_pitch) {
- /* we've already increased the size, this overflow is
- * from a batch submitted before resize, but executed
- * after
- */
- return;
- }
-
- fd_bo_del(fd6_ctx->vsc_draw_strm);
- fd6_ctx->vsc_draw_strm = NULL;
- fd6_ctx->vsc_draw_strm_pitch *= 2;
-
- mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
- fd6_ctx->vsc_draw_strm_pitch);
-
- } else if (buffer == 0x3) {
- /* VSC_PRIM_STRM overflow: */
-
- if (size < fd6_ctx->vsc_prim_strm_pitch) {
- /* we've already increased the size */
- return;
- }
-
- fd_bo_del(fd6_ctx->vsc_prim_strm);
- fd6_ctx->vsc_prim_strm = NULL;
- fd6_ctx->vsc_prim_strm_pitch *= 2;
-
- mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
- fd6_ctx->vsc_prim_strm_pitch);
-
- } else {
- /* NOTE: it's possible, for example, for overflow to corrupt the
- * control page. I mostly just see this hit if I set initial VSC
- * buffer size extremely small. Things still seem to recover,
- * but maybe we should pre-emptively realloc vsc_data/vsc_data2
- * and hope for different memory placement?
- */
- mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
- }
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
+ uint32_t vsc_overflow = control->vsc_overflow;
+
+ if (!vsc_overflow)
+ return;
+
+ /* clear overflow flag: */
+ control->vsc_overflow = 0;
+
+ unsigned buffer = vsc_overflow & 0x3;
+ unsigned size = vsc_overflow & ~0x3;
+
+ if (buffer == 0x1) {
+ /* VSC_DRAW_STRM overflow: */
+
+ if (size < fd6_ctx->vsc_draw_strm_pitch) {
+ /* we've already increased the size, this overflow is
+ * from a batch submitted before resize, but executed
+ * after
+ */
+ return;
+ }
+
+ fd_bo_del(fd6_ctx->vsc_draw_strm);
+ fd6_ctx->vsc_draw_strm = NULL;
+ fd6_ctx->vsc_draw_strm_pitch *= 2;
+
+ mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
+ fd6_ctx->vsc_draw_strm_pitch);
+
+ } else if (buffer == 0x3) {
+ /* VSC_PRIM_STRM overflow: */
+
+ if (size < fd6_ctx->vsc_prim_strm_pitch) {
+ /* we've already increased the size */
+ return;
+ }
+
+ fd_bo_del(fd6_ctx->vsc_prim_strm);
+ fd6_ctx->vsc_prim_strm = NULL;
+ fd6_ctx->vsc_prim_strm_pitch *= 2;
+
+ mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
+ fd6_ctx->vsc_prim_strm_pitch);
+
+ } else {
+ /* NOTE: it's possible, for example, for overflow to corrupt the
+ * control page. I mostly just see this hit if I set initial VSC
+ * buffer size extremely small. Things still seem to recover,
+ * but maybe we should pre-emptively realloc vsc_data/vsc_data2
+ * and hope for different memory placement?
+ */
+ mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
+ }
}
static void
emit_common_init(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_autotune *at = &batch->ctx->autotune;
- struct fd_batch_result *result = batch->autotune_result;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_autotune *at = &batch->ctx->autotune;
+ struct fd_batch_result *result = batch->autotune_result;
- if (!result)
- return;
+ if (!result)
+ return;
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
- OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
+ OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
- fd6_event_write(batch, ring, ZPASS_DONE, false);
+ fd6_event_write(batch, ring, ZPASS_DONE, false);
}
static void
emit_common_fini(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_autotune *at = &batch->ctx->autotune;
- struct fd_batch_result *result = batch->autotune_result;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_autotune *at = &batch->ctx->autotune;
+ struct fd_batch_result *result = batch->autotune_result;
- if (!result)
- return;
+ if (!result)
+ return;
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
- OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
+ OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
- fd6_event_write(batch, ring, ZPASS_DONE, false);
+ fd6_event_write(batch, ring, ZPASS_DONE, false);
- // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
- OUT_RELOC(ring, results_ptr(at, fence));
- OUT_RING(ring, result->fence);
+ // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
+ OUT_RELOC(ring, results_ptr(at, fence));
+ OUT_RING(ring, result->fence);
}
/*
*/
static void
emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
- struct fd_ringbuffer *target)
+ struct fd_ringbuffer *target)
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- if (target->cur == target->start)
- return;
+ if (target->cur == target->start)
+ return;
- emit_marker6(ring, 6);
+ emit_marker6(ring, 6);
- unsigned count = fd_ringbuffer_cmd_count(target);
+ unsigned count = fd_ringbuffer_cmd_count(target);
- BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
+ BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
- OUT_PKT7(ring, CP_REG_TEST, 1);
- OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
- A6XX_CP_REG_TEST_0_BIT(tile->n) |
- A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
+ OUT_PKT7(ring, CP_REG_TEST, 1);
+ OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
+ A6XX_CP_REG_TEST_0_BIT(tile->n) |
+ A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
- OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
- OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
- OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
+ OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
+ OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
+ OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
- for (unsigned i = 0; i < count; i++) {
- uint32_t dwords;
- OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
- dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
- assert(dwords > 0);
- OUT_RING(ring, dwords);
- }
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t dwords;
+ OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
+ dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
+ assert(dwords > 0);
+ OUT_RING(ring, dwords);
+ }
- emit_marker6(ring, 6);
+ emit_marker6(ring, 6);
}
static void
-set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2)
+set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
+ uint32_t y2)
{
- OUT_REG(ring,
- A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
- A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
+ OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
+ A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
- OUT_REG(ring,
- A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
- A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
+ OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
+ A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
}
static void
set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
{
- OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
- OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
- /* no flag for RB_BIN_CONTROL2... */
- OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
+ OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
+ OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
+ /* no flag for RB_BIN_CONTROL2... */
+ OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
}
static void
-emit_binning_pass(struct fd_batch *batch)
- assert_dt
+emit_binning_pass(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_screen *screen = batch->ctx->screen;
+ struct fd_ringbuffer *ring = batch->gmem;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_screen *screen = batch->ctx->screen;
- debug_assert(!batch->tessellation);
+ debug_assert(!batch->tessellation);
- set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
+ set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
- emit_marker6(ring, 7);
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
+ emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
- OUT_PKT7(ring, CP_SET_MODE, 1);
- OUT_RING(ring, 0x1);
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x1);
- OUT_WFI5(ring);
+ OUT_WFI5(ring);
- OUT_REG(ring, A6XX_VFD_MODE_CNTL(.binning_pass = true));
+ OUT_REG(ring, A6XX_VFD_MODE_CNTL(.binning_pass = true));
- update_vsc_pipe(batch);
+ update_vsc_pipe(batch);
- OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
- OUT_RING(ring, screen->info.a6xx.magic.PC_UNKNOWN_9805);
+ OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
+ OUT_RING(ring, screen->info.a6xx.magic.PC_UNKNOWN_9805);
- OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
- OUT_RING(ring, screen->info.a6xx.magic.SP_UNKNOWN_A0F8);
+ OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+ OUT_RING(ring, screen->info.a6xx.magic.SP_UNKNOWN_A0F8);
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, UNK_2C);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, UNK_2C);
- OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) |
- A6XX_RB_WINDOW_OFFSET_Y(0));
+ OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
- OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
- OUT_RING(ring, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
- A6XX_SP_TP_WINDOW_OFFSET_Y(0));
+ OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
+ OUT_RING(ring,
+ A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
- /* emit IB to binning drawcmds: */
- trace_start_binning_ib(&batch->trace);
- fd6_emit_ib(ring, batch->draw);
- trace_end_binning_ib(&batch->trace);
+ /* emit IB to binning drawcmds: */
+ trace_start_binning_ib(&batch->trace);
+ fd6_emit_ib(ring, batch->draw);
+ trace_end_binning_ib(&batch->trace);
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
- OUT_PKT7(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, UNK_2D);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, UNK_2D);
- fd6_cache_inv(batch, ring);
- fd6_cache_flush(batch, ring);
- fd_wfi(batch, ring);
+ fd6_cache_inv(batch, ring);
+ fd6_cache_flush(batch, ring);
+ fd_wfi(batch, ring);
- OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
- trace_start_vsc_overflow_test(&batch->trace);
- emit_vsc_overflow_test(batch);
- trace_end_vsc_overflow_test(&batch->trace);
+ trace_start_vsc_overflow_test(&batch->trace);
+ emit_vsc_overflow_test(batch);
+ trace_end_vsc_overflow_test(&batch->trace);
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
- OUT_PKT7(ring, CP_SET_MODE, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
- OUT_WFI5(ring);
+ OUT_WFI5(ring);
- OUT_REG(ring,
- A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
- .gmem = true,
- .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
+ OUT_REG(ring,
+ A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
+ .gmem = true,
+ .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
}
static void
emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
{
- enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
-
- OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
-
- OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
- OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
- OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
- COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
-
- OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
- OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
+ enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
+
+ OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE,
+ A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE,
+ A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+ OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
+ OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
+ OUT_RING(ring,
+ A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
+ COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+ OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
+ OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
}
static void prepare_tile_setup_ib(struct fd_batch *batch);
/* before first tile */
static void
-fd6_emit_tile_init(struct fd_batch *batch)
- assert_dt
+fd6_emit_tile_init(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd_screen *screen = batch->ctx->screen;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd_screen *screen = batch->ctx->screen;
- fd6_emit_restore(batch, ring);
+ fd6_emit_restore(batch, ring);
- fd6_emit_lrz_flush(ring);
+ fd6_emit_lrz_flush(ring);
- if (batch->prologue) {
- trace_start_prologue(&batch->trace);
- fd6_emit_ib(ring, batch->prologue);
- trace_end_prologue(&batch->trace);
- }
+ if (batch->prologue) {
+ trace_start_prologue(&batch->trace);
+ fd6_emit_ib(ring, batch->prologue);
+ trace_end_prologue(&batch->trace);
+ }
- fd6_cache_inv(batch, ring);
+ fd6_cache_inv(batch, ring);
- prepare_tile_setup_ib(batch);
- prepare_tile_fini_ib(batch);
+ prepare_tile_setup_ib(batch);
+ prepare_tile_fini_ib(batch);
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- /* blob controls "local" in IB2, but I think that is not required */
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
- OUT_RING(ring, 0x1);
+ /* blob controls "local" in IB2, but I think that is not required */
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x1);
- fd_wfi(batch, ring);
- OUT_REG(ring,
- A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
- .gmem = true,
- .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
+ fd_wfi(batch, ring);
+ OUT_REG(ring,
+ A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
+ .gmem = true,
+ .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
- emit_zs(ring, pfb->zsbuf, batch->gmem_state);
- emit_mrt(ring, pfb, batch->gmem_state);
- emit_msaa(ring, pfb->samples);
- patch_fb_read(batch);
+ emit_zs(ring, pfb->zsbuf, batch->gmem_state);
+ emit_mrt(ring, pfb, batch->gmem_state);
+ emit_msaa(ring, pfb->samples);
+ patch_fb_read(batch);
- if (use_hw_binning(batch)) {
- /* enable stream-out during binning pass: */
- OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
+ if (use_hw_binning(batch)) {
+ /* enable stream-out during binning pass: */
+ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
- set_bin_size(ring, gmem->bin_w, gmem->bin_h,
- A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
- update_render_cntl(batch, pfb, true);
- emit_binning_pass(batch);
+ set_bin_size(ring, gmem->bin_w, gmem->bin_h,
+ A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
+ update_render_cntl(batch, pfb, true);
+ emit_binning_pass(batch);
- /* and disable stream-out for draw pass: */
- OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
+ /* and disable stream-out for draw pass: */
+ OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
- /*
- * NOTE: even if we detect VSC overflow and disable use of
- * visibility stream in draw pass, it is still safe to execute
- * the reset of these cmds:
- */
+ /*
+ * NOTE: even if we detect VSC overflow and disable use of
+ * visibility stream in draw pass, it is still safe to execute
+ * the reset of these cmds:
+ */
-// NOTE a618 not setting .USE_VIZ .. from a quick check on a630, it
-// does not appear that this bit changes much (ie. it isn't actually
-// .USE_VIZ like previous gens)
- set_bin_size(ring, gmem->bin_w, gmem->bin_h,
- A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
+ // NOTE a618 not setting .USE_VIZ .. from a quick check on a630, it
+ // does not appear that this bit changes much (ie. it isn't actually
+ // .USE_VIZ like previous gens)
+ set_bin_size(ring, gmem->bin_w, gmem->bin_h,
+ A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
- OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0);
- OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
- OUT_RING(ring, screen->info.a6xx.magic.PC_UNKNOWN_9805);
+ OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
+ OUT_RING(ring, screen->info.a6xx.magic.PC_UNKNOWN_9805);
- OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
- OUT_RING(ring, screen->info.a6xx.magic.SP_UNKNOWN_A0F8);
+ OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+ OUT_RING(ring, screen->info.a6xx.magic.SP_UNKNOWN_A0F8);
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x1);
- } else {
- /* no binning pass, so enable stream-out for draw pass:: */
- OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x1);
+ } else {
+ /* no binning pass, so enable stream-out for draw pass:: */
+ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
- set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
- }
+ set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
+ }
- update_render_cntl(batch, pfb, false);
+ update_render_cntl(batch, pfb, false);
- emit_common_init(batch);
+ emit_common_init(batch);
}
static void
set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
{
- OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
- OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) |
- A6XX_RB_WINDOW_OFFSET_Y(y1));
+ OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
- OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
- OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) |
- A6XX_RB_WINDOW_OFFSET2_Y(y1));
+ OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
+ OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
- OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
- OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) |
- A6XX_SP_WINDOW_OFFSET_Y(y1));
+ OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
- OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
- OUT_RING(ring, A6XX_SP_TP_WINDOW_OFFSET_X(x1) |
- A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
+ OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
+ OUT_RING(ring,
+ A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
}
/* before mem2gmem */
static void
fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_context *ctx = batch->ctx;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_context *ctx = batch->ctx;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd_ringbuffer *ring = batch->gmem;
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
- emit_marker6(ring, 7);
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
+ emit_marker6(ring, 7);
- uint32_t x1 = tile->xoff;
- uint32_t y1 = tile->yoff;
- uint32_t x2 = tile->xoff + tile->bin_w - 1;
- uint32_t y2 = tile->yoff + tile->bin_h - 1;
+ uint32_t x1 = tile->xoff;
+ uint32_t y1 = tile->yoff;
+ uint32_t x2 = tile->xoff + tile->bin_w - 1;
+ uint32_t y2 = tile->yoff + tile->bin_h - 1;
- set_scissor(ring, x1, y1, x2, y2);
+ set_scissor(ring, x1, y1, x2, y2);
- if (use_hw_binning(batch)) {
- const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
+ if (use_hw_binning(batch)) {
+ const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
- OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
- OUT_PKT7(ring, CP_SET_MODE, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
- OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
- OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
- CP_SET_BIN_DATA5_0_VSC_N(tile->n));
- OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
- (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
- OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
- (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
- OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
- (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
+ OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
+ OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
+ CP_SET_BIN_DATA5_0_VSC_N(tile->n));
+ OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
+ (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
+ OUT_RELOC(ring,
+ fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
+ (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
+ OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
+ (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
- set_window_offset(ring, x1, y1);
+ set_window_offset(ring, x1, y1);
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
- OUT_PKT7(ring, CP_SET_MODE, 1);
- OUT_RING(ring, 0x0);
- } else {
- set_window_offset(ring, x1, y1);
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
+ } else {
+ set_window_offset(ring, x1, y1);
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
- OUT_PKT7(ring, CP_SET_MODE, 1);
- OUT_RING(ring, 0x0);
- }
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
+ }
}
static void
set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct pipe_scissor_state blit_scissor = batch->max_scissor;
-
- blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
- blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
- blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
- blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
- OUT_RING(ring,
- A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
- A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
- OUT_RING(ring,
- A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
- A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
+ struct pipe_scissor_state blit_scissor = batch->max_scissor;
+
+ blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
+ blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
+ blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
+ blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
+ OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
+ A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
+ OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
+ A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
}
static void
-emit_blit(struct fd_batch *batch,
- struct fd_ringbuffer *ring,
- uint32_t base,
- struct pipe_surface *psurf,
- bool stencil)
+emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
+ struct pipe_surface *psurf, bool stencil)
{
- struct fd_resource *rsc = fd_resource(psurf->texture);
- enum pipe_format pfmt = psurf->format;
- uint32_t offset;
- bool ubwc_enabled;
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- /* separate stencil case: */
- if (stencil) {
- rsc = rsc->stencil;
- pfmt = rsc->b.b.format;
- }
-
- offset = fd_resource_offset(rsc, psurf->u.tex.level,
- psurf->u.tex.first_layer);
- ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
-
- enum a6xx_format format = fd6_pipe2color(pfmt);
- uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
- uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;
- enum a3xx_color_swap swap = fd6_resource_swap(rsc, pfmt);
- enum a3xx_msaa_samples samples =
- fd_msaa_samples(rsc->b.b.nr_samples);
- uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
-
- OUT_REG(ring,
- A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
- .color_format = format, .color_swap = swap, .flags = ubwc_enabled),
- A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
- A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
- A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
-
- OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
-
- if (ubwc_enabled) {
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
- fd6_emit_flag_reference(ring, rsc,
- psurf->u.tex.level, psurf->u.tex.first_layer);
- }
-
- fd6_emit_blit(batch, ring);
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ enum pipe_format pfmt = psurf->format;
+ uint32_t offset;
+ bool ubwc_enabled;
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ /* separate stencil case: */
+ if (stencil) {
+ rsc = rsc->stencil;
+ pfmt = rsc->b.b.format;
+ }
+
+ offset =
+ fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+ ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ enum a6xx_format format = fd6_pipe2color(pfmt);
+ uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
+ uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;
+ enum a3xx_color_swap swap = fd6_resource_swap(rsc, pfmt);
+ enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
+ uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
+
+ OUT_REG(ring,
+ A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
+ .color_format = format, .color_swap = swap,
+ .flags = ubwc_enabled),
+ A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
+ A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
+ A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
+
+ OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
+
+ if (ubwc_enabled) {
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
+ fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
+ psurf->u.tex.first_layer);
+ }
+
+ fd6_emit_blit(batch, ring);
}
static void
-emit_restore_blit(struct fd_batch *batch,
- struct fd_ringbuffer *ring,
- uint32_t base,
- struct pipe_surface *psurf,
- unsigned buffer)
+emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
+ uint32_t base, struct pipe_surface *psurf, unsigned buffer)
{
- bool stencil = (buffer == FD_BUFFER_STENCIL);
+ bool stencil = (buffer == FD_BUFFER_STENCIL);
- OUT_REG(ring, A6XX_RB_BLIT_INFO(
- .gmem = true, .unk0 = true,
- .depth = (buffer == FD_BUFFER_DEPTH),
- .sample_0 = util_format_is_pure_integer(psurf->format)));
+ OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,
+ .depth = (buffer == FD_BUFFER_DEPTH),
+ .sample_0 = util_format_is_pure_integer(
+ psurf->format)));
- emit_blit(batch, ring, base, psurf, stencil);
+ emit_blit(batch, ring, base, psurf, stencil);
}
static void
emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
-
- uint32_t buffers = batch->fast_cleared;
-
- if (buffers & PIPE_CLEAR_COLOR) {
-
- for (int i = 0; i < pfb->nr_cbufs; i++) {
- union pipe_color_union *color = &batch->clear_color[i];
- union util_color uc = {0};
-
- if (!pfb->cbufs[i])
- continue;
-
- if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
- continue;
-
- enum pipe_format pfmt = pfb->cbufs[i]->format;
-
- // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
- union pipe_color_union swapped;
- switch (fd6_pipe2swap(pfmt)) {
- case WZYX:
- swapped.ui[0] = color->ui[0];
- swapped.ui[1] = color->ui[1];
- swapped.ui[2] = color->ui[2];
- swapped.ui[3] = color->ui[3];
- break;
- case WXYZ:
- swapped.ui[2] = color->ui[0];
- swapped.ui[1] = color->ui[1];
- swapped.ui[0] = color->ui[2];
- swapped.ui[3] = color->ui[3];
- break;
- case ZYXW:
- swapped.ui[3] = color->ui[0];
- swapped.ui[0] = color->ui[1];
- swapped.ui[1] = color->ui[2];
- swapped.ui[2] = color->ui[3];
- break;
- case XYZW:
- swapped.ui[3] = color->ui[0];
- swapped.ui[2] = color->ui[1];
- swapped.ui[1] = color->ui[2];
- swapped.ui[0] = color->ui[3];
- break;
- }
-
- util_pack_color_union(pfmt, &uc, &swapped);
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
- A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
- A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- OUT_RING(ring, gmem->cbuf_base[i]);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
- OUT_RING(ring, uc.ui[0]);
- OUT_RING(ring, uc.ui[1]);
- OUT_RING(ring, uc.ui[2]);
- OUT_RING(ring, uc.ui[3]);
-
- fd6_emit_blit(batch, ring);
- }
- }
-
- const bool has_depth = pfb->zsbuf;
- const bool has_separate_stencil =
- has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
-
- /* First clear depth or combined depth/stencil. */
- if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
- (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
- enum pipe_format pfmt = pfb->zsbuf->format;
- uint32_t clear_value;
- uint32_t mask = 0;
-
- if (has_separate_stencil) {
- pfmt = util_format_get_depth_only(pfb->zsbuf->format);
- clear_value = util_pack_z(pfmt, batch->clear_depth);
- } else {
- pfmt = pfb->zsbuf->format;
- clear_value = util_pack_z_stencil(pfmt, batch->clear_depth,
- batch->clear_stencil);
- }
-
- if (buffers & PIPE_CLEAR_DEPTH)
- mask |= 0x1;
-
- if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
- mask |= 0x2;
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
- A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
- // XXX UNK0 for separate stencil ??
- A6XX_RB_BLIT_INFO_DEPTH |
- A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- OUT_RING(ring, gmem->zsbuf_base[0]);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
- OUT_RING(ring, clear_value);
-
- fd6_emit_blit(batch, ring);
- }
-
- /* Then clear the separate stencil buffer in case of 32 bit depth
- * formats with separate stencil. */
- if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
- A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
- A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
- OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
- //A6XX_RB_BLIT_INFO_UNK0 |
- A6XX_RB_BLIT_INFO_DEPTH |
- A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- OUT_RING(ring, gmem->zsbuf_base[1]);
-
- OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
- OUT_RING(ring, batch->clear_stencil & 0xff);
-
- fd6_emit_blit(batch, ring);
- }
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
+
+ uint32_t buffers = batch->fast_cleared;
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+
+ for (int i = 0; i < pfb->nr_cbufs; i++) {
+ union pipe_color_union *color = &batch->clear_color[i];
+ union util_color uc = {0};
+
+ if (!pfb->cbufs[i])
+ continue;
+
+ if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+
+ enum pipe_format pfmt = pfb->cbufs[i]->format;
+
+ // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
+ union pipe_color_union swapped;
+ switch (fd6_pipe2swap(pfmt)) {
+ case WZYX:
+ swapped.ui[0] = color->ui[0];
+ swapped.ui[1] = color->ui[1];
+ swapped.ui[2] = color->ui[2];
+ swapped.ui[3] = color->ui[3];
+ break;
+ case WXYZ:
+ swapped.ui[2] = color->ui[0];
+ swapped.ui[1] = color->ui[1];
+ swapped.ui[0] = color->ui[2];
+ swapped.ui[3] = color->ui[3];
+ break;
+ case ZYXW:
+ swapped.ui[3] = color->ui[0];
+ swapped.ui[0] = color->ui[1];
+ swapped.ui[1] = color->ui[2];
+ swapped.ui[2] = color->ui[3];
+ break;
+ case XYZW:
+ swapped.ui[3] = color->ui[0];
+ swapped.ui[2] = color->ui[1];
+ swapped.ui[1] = color->ui[2];
+ swapped.ui[0] = color->ui[3];
+ break;
+ }
+
+ util_pack_color_union(pfmt, &uc, &swapped);
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+ OUT_RING(ring,
+ A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+ A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+ OUT_RING(ring,
+ A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+ OUT_RING(ring, gmem->cbuf_base[i]);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
+ OUT_RING(ring, uc.ui[0]);
+ OUT_RING(ring, uc.ui[1]);
+ OUT_RING(ring, uc.ui[2]);
+ OUT_RING(ring, uc.ui[3]);
+
+ fd6_emit_blit(batch, ring);
+ }
+ }
+
+ const bool has_depth = pfb->zsbuf;
+ const bool has_separate_stencil =
+ has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
+
+ /* First clear depth or combined depth/stencil. */
+ if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
+ (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
+ enum pipe_format pfmt = pfb->zsbuf->format;
+ uint32_t clear_value;
+ uint32_t mask = 0;
+
+ if (has_separate_stencil) {
+ pfmt = util_format_get_depth_only(pfb->zsbuf->format);
+ clear_value = util_pack_z(pfmt, batch->clear_depth);
+ } else {
+ pfmt = pfb->zsbuf->format;
+ clear_value =
+ util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH)
+ mask |= 0x1;
+
+ if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
+ mask |= 0x2;
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+ OUT_RING(ring,
+ A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+ A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+ OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+ // XXX UNK0 for separate stencil ??
+ A6XX_RB_BLIT_INFO_DEPTH |
+ A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+ OUT_RING(ring, gmem->zsbuf_base[0]);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, clear_value);
+
+ fd6_emit_blit(batch, ring);
+ }
+
+ /* Then clear the separate stencil buffer in case of 32 bit depth
+ * formats with separate stencil. */
+ if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+ OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+ A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+ A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+ OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+ // A6XX_RB_BLIT_INFO_UNK0 |
+ A6XX_RB_BLIT_INFO_DEPTH |
+ A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+ OUT_RING(ring, gmem->zsbuf_base[1]);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, batch->clear_stencil & 0xff);
+
+ fd6_emit_blit(batch, ring);
+ }
}
/*
static void
emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- if (batch->restore & FD_BUFFER_COLOR) {
- unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
- FD_BUFFER_COLOR);
- }
- }
-
- if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-
- if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
- emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
- FD_BUFFER_DEPTH);
- }
- if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
- emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
- FD_BUFFER_STENCIL);
- }
- }
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ if (batch->restore & FD_BUFFER_COLOR) {
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
+ FD_BUFFER_COLOR);
+ }
+ }
+
+ if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+
+ if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
+ emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
+ FD_BUFFER_DEPTH);
+ }
+ if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
+ emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
+ FD_BUFFER_STENCIL);
+ }
+ }
}
static void
prepare_tile_setup_ib(struct fd_batch *batch)
{
- if (!(batch->restore || batch->fast_cleared))
- return;
+ if (!(batch->restore || batch->fast_cleared))
+ return;
- batch->tile_setup = fd_submit_new_ringbuffer(batch->submit, 0x1000,
- FD_RINGBUFFER_STREAMING);
+ batch->tile_setup =
+ fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
- set_blit_scissor(batch, batch->tile_setup);
+ set_blit_scissor(batch, batch->tile_setup);
- emit_restore_blits(batch, batch->tile_setup);
- emit_clears(batch, batch->tile_setup);
+ emit_restore_blits(batch, batch->tile_setup);
+ emit_clears(batch, batch->tile_setup);
}
/*
static void
fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
{
- if (!batch->tile_setup)
- return;
-
- trace_start_clear_restore(&batch->trace, batch->fast_cleared);
- if (batch->fast_cleared || !use_hw_binning(batch)) {
- fd6_emit_ib(batch->gmem, batch->tile_setup);
- } else {
- emit_conditional_ib(batch, tile, batch->tile_setup);
- }
- trace_end_clear_restore(&batch->trace);
+ if (!batch->tile_setup)
+ return;
+
+ trace_start_clear_restore(&batch->trace, batch->fast_cleared);
+ if (batch->fast_cleared || !use_hw_binning(batch)) {
+ fd6_emit_ib(batch->gmem, batch->tile_setup);
+ } else {
+ emit_conditional_ib(batch, tile, batch->tile_setup);
+ }
+ trace_end_clear_restore(&batch->trace);
}
static bool
blit_can_resolve(enum pipe_format format)
{
- const struct util_format_description *desc = util_format_description(format);
-
- /* blit event can only do resolve for simple cases:
- * averaging samples as unsigned integers or choosing only one sample
- */
- if (util_format_is_snorm(format) || util_format_is_srgb(format))
- return false;
-
- /* can't do formats with larger channel sizes
- * note: this includes all float formats
- * note2: single channel integer formats seem OK
- */
- if (desc->channel[0].size > 10)
- return false;
-
- switch (format) {
- /* for unknown reasons blit event can't msaa resolve these formats when tiled
- * likely related to these formats having different layout from other cpp=2 formats
- */
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R8G8_UINT:
- case PIPE_FORMAT_R8G8_SINT:
- /* TODO: this one should be able to work? */
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return false;
- default:
- break;
- }
-
- return true;
+ const struct util_format_description *desc = util_format_description(format);
+
+ /* blit event can only do resolve for simple cases:
+ * averaging samples as unsigned integers or choosing only one sample
+ */
+ if (util_format_is_snorm(format) || util_format_is_srgb(format))
+ return false;
+
+ /* can't do formats with larger channel sizes
+ * note: this includes all float formats
+ * note2: single channel integer formats seem OK
+ */
+ if (desc->channel[0].size > 10)
+ return false;
+
+ switch (format) {
+ /* for unknown reasons blit event can't msaa resolve these formats when tiled
+ * likely related to these formats having different layout from other cpp=2
+ * formats
+ */
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8_UINT:
+ case PIPE_FORMAT_R8G8_SINT:
+ /* TODO: this one should be able to work? */
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return false;
+ default:
+ break;
+ }
+
+ return true;
}
static bool
needs_resolve(struct pipe_surface *psurf)
{
- return psurf->nr_samples && (psurf->nr_samples != psurf->texture->nr_samples);
+ return psurf->nr_samples &&
+ (psurf->nr_samples != psurf->texture->nr_samples);
}
static void
-emit_resolve_blit(struct fd_batch *batch,
- struct fd_ringbuffer *ring,
- uint32_t base,
- struct pipe_surface *psurf,
- unsigned buffer)
- assert_dt
+emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
+ uint32_t base, struct pipe_surface *psurf,
+ unsigned buffer) assert_dt
{
- uint32_t info = 0;
- bool stencil = false;
-
- if (!fd_resource(psurf->texture)->valid)
- return;
-
- /* if we need to resolve, but cannot with BLIT event, we instead need
- * to generate per-tile CP_BLIT (r2d) commands:
- *
- * The separate-stencil is a special case, we might need to use CP_BLIT
- * for depth, but we can still resolve stencil with a BLIT event
- */
- if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
- (buffer != FD_BUFFER_STENCIL)) {
- fd6_resolve_tile(batch, ring, base, psurf);
- return;
- }
-
- switch (buffer) {
- case FD_BUFFER_COLOR:
- break;
- case FD_BUFFER_STENCIL:
- info |= A6XX_RB_BLIT_INFO_UNK0;
- stencil = true;
- break;
- case FD_BUFFER_DEPTH:
- info |= A6XX_RB_BLIT_INFO_DEPTH;
- break;
- }
-
- if (util_format_is_pure_integer(psurf->format) || util_format_is_depth_or_stencil(psurf->format))
- info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
-
- OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
- OUT_RING(ring, info);
-
- emit_blit(batch, ring, base, psurf, stencil);
+ uint32_t info = 0;
+ bool stencil = false;
+
+ if (!fd_resource(psurf->texture)->valid)
+ return;
+
+ /* if we need to resolve, but cannot with BLIT event, we instead need
+ * to generate per-tile CP_BLIT (r2d) commands:
+ *
+ * The separate-stencil is a special case, we might need to use CP_BLIT
+ * for depth, but we can still resolve stencil with a BLIT event
+ */
+ if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
+ (buffer != FD_BUFFER_STENCIL)) {
+ fd6_resolve_tile(batch, ring, base, psurf);
+ return;
+ }
+
+ switch (buffer) {
+ case FD_BUFFER_COLOR:
+ break;
+ case FD_BUFFER_STENCIL:
+ info |= A6XX_RB_BLIT_INFO_UNK0;
+ stencil = true;
+ break;
+ case FD_BUFFER_DEPTH:
+ info |= A6XX_RB_BLIT_INFO_DEPTH;
+ break;
+ }
+
+ if (util_format_is_pure_integer(psurf->format) ||
+ util_format_is_depth_or_stencil(psurf->format))
+ info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
+
+ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+ OUT_RING(ring, info);
+
+ emit_blit(batch, ring, base, psurf, stencil);
}
/*
*/
static void
-prepare_tile_fini_ib(struct fd_batch *batch)
- assert_dt
+prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
{
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_ringbuffer *ring;
-
- batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
- FD_RINGBUFFER_STREAMING);
- ring = batch->tile_fini;
-
- set_blit_scissor(batch, ring);
-
- if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-
- if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
- emit_resolve_blit(batch, ring,
- gmem->zsbuf_base[0], pfb->zsbuf,
- FD_BUFFER_DEPTH);
- }
- if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
- emit_resolve_blit(batch, ring,
- gmem->zsbuf_base[1], pfb->zsbuf,
- FD_BUFFER_STENCIL);
- }
- }
-
- if (batch->resolve & FD_BUFFER_COLOR) {
- unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++) {
- if (!pfb->cbufs[i])
- continue;
- if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
- continue;
- emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
- FD_BUFFER_COLOR);
- }
- }
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_ringbuffer *ring;
+
+ batch->tile_fini =
+ fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+ ring = batch->tile_fini;
+
+ set_blit_scissor(batch, ring);
+
+ if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+
+ if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
+ emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
+ FD_BUFFER_DEPTH);
+ }
+ if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
+ emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
+ FD_BUFFER_STENCIL);
+ }
+ }
+
+ if (batch->resolve & FD_BUFFER_COLOR) {
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
+ FD_BUFFER_COLOR);
+ }
+ }
}
static void
fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
{
- if (!use_hw_binning(batch)) {
- fd6_emit_ib(batch->gmem, batch->draw);
- } else {
- emit_conditional_ib(batch, tile, batch->draw);
- }
-
- if (batch->epilogue)
- fd6_emit_ib(batch->gmem, batch->epilogue);
+ if (!use_hw_binning(batch)) {
+ fd6_emit_ib(batch->gmem, batch->draw);
+ } else {
+ emit_conditional_ib(batch, tile, batch->draw);
+ }
+
+ if (batch->epilogue)
+ fd6_emit_ib(batch->gmem, batch->epilogue);
}
static void
fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
{
- struct fd_ringbuffer *ring = batch->gmem;
-
- if (use_hw_binning(batch)) {
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
- }
-
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
-
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
- OUT_RING(ring, 0x0);
-
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
- emit_marker6(ring, 7);
-
- trace_start_resolve(&batch->trace);
- if (batch->fast_cleared || !use_hw_binning(batch)) {
- fd6_emit_ib(batch->gmem, batch->tile_fini);
- } else {
- emit_conditional_ib(batch, tile, batch->tile_fini);
- }
- trace_end_resolve(&batch->trace);
+ struct fd_ringbuffer *ring = batch->gmem;
+
+ if (use_hw_binning(batch)) {
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
+ }
+
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x0);
+
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
+ emit_marker6(ring, 7);
+
+ trace_start_resolve(&batch->trace);
+ if (batch->fast_cleared || !use_hw_binning(batch)) {
+ fd6_emit_ib(batch->gmem, batch->tile_fini);
+ } else {
+ emit_conditional_ib(batch, tile, batch->tile_fini);
+ }
+ trace_end_resolve(&batch->trace);
}
static void
fd6_emit_tile_fini(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- emit_common_fini(batch);
+ emit_common_fini(batch);
- OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
+ OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
- fd6_emit_lrz_flush(ring);
+ fd6_emit_lrz_flush(ring);
- fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
- if (use_hw_binning(batch)) {
- check_vsc_overflow(batch->ctx);
- }
+ if (use_hw_binning(batch)) {
+ check_vsc_overflow(batch->ctx);
+ }
}
static void
-emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
- assert_dt
+emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_context *ctx = batch->ctx;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- uint32_t buffers = batch->fast_cleared;
+ uint32_t buffers = batch->fast_cleared;
- if (!buffers)
- return;
+ if (!buffers)
+ return;
- trace_start_clear_restore(&batch->trace, buffers);
+ trace_start_clear_restore(&batch->trace, buffers);
- if (buffers & PIPE_CLEAR_COLOR) {
- for (int i = 0; i < pfb->nr_cbufs; i++) {
- union pipe_color_union color = batch->clear_color[i];
+ if (buffers & PIPE_CLEAR_COLOR) {
+ for (int i = 0; i < pfb->nr_cbufs; i++) {
+ union pipe_color_union color = batch->clear_color[i];
- if (!pfb->cbufs[i])
- continue;
+ if (!pfb->cbufs[i])
+ continue;
- if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
- continue;
+ if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
- fd6_clear_surface(ctx, ring,
- pfb->cbufs[i], pfb->width, pfb->height, &color);
- }
- }
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- union pipe_color_union value = {};
+ fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,
+ &color);
+ }
+ }
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ union pipe_color_union value = {};
- const bool has_depth = pfb->zsbuf;
- struct pipe_resource *separate_stencil =
- has_depth && fd_resource(pfb->zsbuf->texture)->stencil ?
- &fd_resource(pfb->zsbuf->texture)->stencil->b.b : NULL;
+ const bool has_depth = pfb->zsbuf;
+ struct pipe_resource *separate_stencil =
+ has_depth && fd_resource(pfb->zsbuf->texture)->stencil
+ ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
+ : NULL;
- if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
- (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
- value.f[0] = batch->clear_depth;
- value.ui[1] = batch->clear_stencil;
- fd6_clear_surface(ctx, ring,
- pfb->zsbuf, pfb->width, pfb->height, &value);
- }
+ if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
+ (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
+ value.f[0] = batch->clear_depth;
+ value.ui[1] = batch->clear_stencil;
+ fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,
+ &value);
+ }
- if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
- value.ui[0] = batch->clear_stencil;
+ if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
+ value.ui[0] = batch->clear_stencil;
- struct pipe_surface stencil_surf = *pfb->zsbuf;
- stencil_surf.format = PIPE_FORMAT_S8_UINT;
- stencil_surf.texture = separate_stencil;
+ struct pipe_surface stencil_surf = *pfb->zsbuf;
+ stencil_surf.format = PIPE_FORMAT_S8_UINT;
+ stencil_surf.texture = separate_stencil;
- fd6_clear_surface(ctx, ring,
- &stencil_surf, pfb->width, pfb->height, &value);
- }
- }
+ fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,
+ &value);
+ }
+ }
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- trace_end_clear_restore(&batch->trace);
+ trace_end_clear_restore(&batch->trace);
}
static void
setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- batch->tessfactor_bo = fd_bo_new(ctx->screen->dev,
- batch->tessfactor_size,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "tessfactor");
+ batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "tessfactor");
- batch->tessparam_bo = fd_bo_new(ctx->screen->dev,
- batch->tessparam_size,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "tessparam");
+ batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "tessparam");
- OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
- OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
+ OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
+ OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
- batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
- OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
- OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
+ batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
+ OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
+ OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
}
static void
-fd6_emit_sysmem_prep(struct fd_batch *batch)
- assert_dt
+fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->gmem;
- struct fd_screen *screen = batch->ctx->screen;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_screen *screen = batch->ctx->screen;
- fd6_emit_restore(batch, ring);
- fd6_emit_lrz_flush(ring);
+ fd6_emit_restore(batch, ring);
+ fd6_emit_lrz_flush(ring);
- if (batch->prologue) {
- if (!batch->nondraw) {
- trace_start_prologue(&batch->trace);
- }
- fd6_emit_ib(ring, batch->prologue);
- if (!batch->nondraw) {
- trace_end_prologue(&batch->trace);
- }
- }
+ if (batch->prologue) {
+ if (!batch->nondraw) {
+ trace_start_prologue(&batch->trace);
+ }
+ fd6_emit_ib(ring, batch->prologue);
+ if (!batch->nondraw) {
+ trace_end_prologue(&batch->trace);
+ }
+ }
- /* remaining setup below here does not apply to blit/compute: */
- if (batch->nondraw)
- return;
+ /* remaining setup below here does not apply to blit/compute: */
+ if (batch->nondraw)
+ return;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- if (pfb->width > 0 && pfb->height > 0)
- set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
- else
- set_scissor(ring, 0, 0, 0, 0);
+ if (pfb->width > 0 && pfb->height > 0)
+ set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
+ else
+ set_scissor(ring, 0, 0, 0, 0);
- set_window_offset(ring, 0, 0);
+ set_window_offset(ring, 0, 0);
- set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
+ set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
- emit_sysmem_clears(batch, ring);
+ emit_sysmem_clears(batch, ring);
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
- emit_marker6(ring, 7);
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
+ emit_marker6(ring, 7);
- if (batch->tessellation)
- setup_tess_buffers(batch, ring);
+ if (batch->tessellation)
+ setup_tess_buffers(batch, ring);
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- /* blob controls "local" in IB2, but I think that is not required */
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
- OUT_RING(ring, 0x1);
+ /* blob controls "local" in IB2, but I think that is not required */
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x1);
- fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
- fd6_cache_inv(batch, ring);
+ fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
+ fd6_cache_inv(batch, ring);
- fd_wfi(batch, ring);
- OUT_REG(ring, A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
+ fd_wfi(batch, ring);
+ OUT_REG(ring,
+ A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
- /* enable stream-out, with sysmem there is only one pass: */
- OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
+ /* enable stream-out, with sysmem there is only one pass: */
+ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
- emit_zs(ring, pfb->zsbuf, NULL);
- emit_mrt(ring, pfb, NULL);
- emit_msaa(ring, pfb->samples);
+ emit_zs(ring, pfb->zsbuf, NULL);
+ emit_mrt(ring, pfb, NULL);
+ emit_msaa(ring, pfb->samples);
- update_render_cntl(batch, pfb, false);
+ update_render_cntl(batch, pfb, false);
- emit_common_init(batch);
+ emit_common_init(batch);
}
static void
fd6_emit_sysmem_fini(struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
- emit_common_fini(batch);
+ emit_common_fini(batch);
- if (batch->epilogue)
- fd6_emit_ib(batch->gmem, batch->epilogue);
+ if (batch->epilogue)
+ fd6_emit_ib(batch->gmem, batch->epilogue);
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x0);
- fd6_emit_lrz_flush(ring);
+ fd6_emit_lrz_flush(ring);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
- fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+ fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
}
void
-fd6_gmem_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
-
- ctx->emit_tile_init = fd6_emit_tile_init;
- ctx->emit_tile_prep = fd6_emit_tile_prep;
- ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
- ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
- ctx->emit_tile = fd6_emit_tile;
- ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
- ctx->emit_tile_fini = fd6_emit_tile_fini;
- ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
- ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_tile_init = fd6_emit_tile_init;
+ ctx->emit_tile_prep = fd6_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
+ ctx->emit_tile = fd6_emit_tile;
+ ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
+ ctx->emit_tile_fini = fd6_emit_tile_fini;
+ ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
+ ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
}
#include "freedreno_resource.h"
#include "freedreno_state.h"
-#include "fd6_image.h"
#include "fd6_format.h"
+#include "fd6_image.h"
#include "fd6_resource.h"
#include "fd6_texture.h"
struct fd6_image {
- struct pipe_resource *prsc;
- enum pipe_format pfmt;
- enum a6xx_tex_type type;
- bool srgb;
- uint32_t cpp;
- uint32_t level;
- uint32_t width;
- uint32_t height;
- uint32_t depth;
- uint32_t pitch;
- uint32_t array_pitch;
- struct fd_bo *bo;
- uint32_t ubwc_offset;
- uint32_t offset;
- bool buffer;
+ struct pipe_resource *prsc;
+ enum pipe_format pfmt;
+ enum a6xx_tex_type type;
+ bool srgb;
+ uint32_t cpp;
+ uint32_t level;
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t pitch;
+ uint32_t array_pitch;
+ struct fd_bo *bo;
+ uint32_t ubwc_offset;
+ uint32_t offset;
+ bool buffer;
};
-static void translate_image(struct fd6_image *img, const struct pipe_image_view *pimg)
+static void
+translate_image(struct fd6_image *img, const struct pipe_image_view *pimg)
{
- enum pipe_format format = pimg->format;
- struct pipe_resource *prsc = pimg->resource;
- struct fd_resource *rsc = fd_resource(prsc);
-
- if (!prsc) {
- memset(img, 0, sizeof(*img));
- return;
- }
-
- img->prsc = prsc;
- img->pfmt = format;
- img->type = fd6_tex_type(prsc->target);
- img->srgb = util_format_is_srgb(format);
- img->cpp = rsc->layout.cpp;
- img->bo = rsc->bo;
-
- /* Treat cube textures as 2d-array: */
- if (img->type == A6XX_TEX_CUBE)
- img->type = A6XX_TEX_2D;
-
- if (prsc->target == PIPE_BUFFER) {
- img->buffer = true;
- img->ubwc_offset = 0; /* not valid for buffers */
- img->offset = pimg->u.buf.offset;
- img->pitch = 0;
- img->array_pitch = 0;
-
- /* size is encoded with low 15b in WIDTH and high bits in
- * HEIGHT, in units of elements:
- */
- unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
- img->width = sz & MASK(15);
- img->height = sz >> 15;
- img->depth = 0;
- img->level = 0;
- } else {
- img->buffer = false;
-
- unsigned lvl = pimg->u.tex.level;
- unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
-
- img->ubwc_offset = fd_resource_ubwc_offset(rsc, lvl, pimg->u.tex.first_layer);
- img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
- img->pitch = fd_resource_pitch(rsc, lvl);
-
- switch (prsc->target) {
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_2D:
- img->array_pitch = rsc->layout.layer_size;
- img->depth = 1;
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- img->array_pitch = rsc->layout.layer_size;
- // TODO the CUBE/CUBE_ARRAY might need to be layers/6 for tex state,
- // but empirically for ibo state it shouldn't be divided.
- img->depth = layers;
- break;
- case PIPE_TEXTURE_3D:
- img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
- img->depth = u_minify(prsc->depth0, lvl);
- break;
- default:
- break;
- }
-
- img->level = lvl;
- img->width = u_minify(prsc->width0, lvl);
- img->height = u_minify(prsc->height0, lvl);
- }
+ enum pipe_format format = pimg->format;
+ struct pipe_resource *prsc = pimg->resource;
+ struct fd_resource *rsc = fd_resource(prsc);
+
+ if (!prsc) {
+ memset(img, 0, sizeof(*img));
+ return;
+ }
+
+ img->prsc = prsc;
+ img->pfmt = format;
+ img->type = fd6_tex_type(prsc->target);
+ img->srgb = util_format_is_srgb(format);
+ img->cpp = rsc->layout.cpp;
+ img->bo = rsc->bo;
+
+ /* Treat cube textures as 2d-array: */
+ if (img->type == A6XX_TEX_CUBE)
+ img->type = A6XX_TEX_2D;
+
+ if (prsc->target == PIPE_BUFFER) {
+ img->buffer = true;
+ img->ubwc_offset = 0; /* not valid for buffers */
+ img->offset = pimg->u.buf.offset;
+ img->pitch = 0;
+ img->array_pitch = 0;
+
+ /* size is encoded with low 15b in WIDTH and high bits in
+ * HEIGHT, in units of elements:
+ */
+ unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
+ img->width = sz & MASK(15);
+ img->height = sz >> 15;
+ img->depth = 0;
+ img->level = 0;
+ } else {
+ img->buffer = false;
+
+ unsigned lvl = pimg->u.tex.level;
+ unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
+
+ img->ubwc_offset =
+ fd_resource_ubwc_offset(rsc, lvl, pimg->u.tex.first_layer);
+ img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
+ img->pitch = fd_resource_pitch(rsc, lvl);
+
+ switch (prsc->target) {
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ img->array_pitch = rsc->layout.layer_size;
+ img->depth = 1;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ img->array_pitch = rsc->layout.layer_size;
+ // TODO the CUBE/CUBE_ARRAY might need to be layers/6 for tex state,
+ // but empirically for ibo state it shouldn't be divided.
+ img->depth = layers;
+ break;
+ case PIPE_TEXTURE_3D:
+ img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
+ img->depth = u_minify(prsc->depth0, lvl);
+ break;
+ default:
+ break;
+ }
+
+ img->level = lvl;
+ img->width = u_minify(prsc->width0, lvl);
+ img->height = u_minify(prsc->height0, lvl);
+ }
}
-static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg)
+static void
+translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg)
{
- enum pipe_format format = PIPE_FORMAT_R32_UINT;
- struct pipe_resource *prsc = pimg->buffer;
- struct fd_resource *rsc = fd_resource(prsc);
-
- if (!prsc) {
- memset(img, 0, sizeof(*img));
- return;
- }
-
- img->prsc = prsc;
- img->pfmt = format;
- img->type = fd6_tex_type(prsc->target);
- img->srgb = util_format_is_srgb(format);
- img->cpp = rsc->layout.cpp;
- img->bo = rsc->bo;
- img->buffer = true;
-
- img->ubwc_offset = 0; /* not valid for buffers */
- img->offset = pimg->buffer_offset;
- img->pitch = 0;
- img->array_pitch = 0;
- img->level = 0;
-
- /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
- * in units of elements:
- */
- unsigned sz = pimg->buffer_size / 4;
- img->width = sz & MASK(15);
- img->height = sz >> 15;
- img->depth = 0;
+ enum pipe_format format = PIPE_FORMAT_R32_UINT;
+ struct pipe_resource *prsc = pimg->buffer;
+ struct fd_resource *rsc = fd_resource(prsc);
+
+ if (!prsc) {
+ memset(img, 0, sizeof(*img));
+ return;
+ }
+
+ img->prsc = prsc;
+ img->pfmt = format;
+ img->type = fd6_tex_type(prsc->target);
+ img->srgb = util_format_is_srgb(format);
+ img->cpp = rsc->layout.cpp;
+ img->bo = rsc->bo;
+ img->buffer = true;
+
+ img->ubwc_offset = 0; /* not valid for buffers */
+ img->offset = pimg->buffer_offset;
+ img->pitch = 0;
+ img->array_pitch = 0;
+ img->level = 0;
+
+ /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
+ * in units of elements:
+ */
+ unsigned sz = pimg->buffer_size / 4;
+ img->width = sz & MASK(15);
+ img->height = sz >> 15;
+ img->depth = 0;
}
-static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
+static void
+emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
{
- struct fd_resource *rsc = fd_resource(img->prsc);
- bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
-
- OUT_RING(ring, fd6_tex_const_0(img->prsc, img->level, img->pfmt,
- PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
- OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
- A6XX_TEX_CONST_1_HEIGHT(img->height));
- OUT_RING(ring,
- COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
- A6XX_TEX_CONST_2_TYPE(img->type) |
- A6XX_TEX_CONST_2_PITCH(img->pitch));
- OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch) |
- COND(ubwc_enabled, A6XX_TEX_CONST_3_FLAG) |
- COND(rsc->layout.tile_all, A6XX_TEX_CONST_3_TILE_ALL));
- if (img->bo) {
- OUT_RELOC(ring, img->bo, img->offset,
- (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth));
- }
-
- OUT_RING(ring, 0x00000000); /* texconst6 */
-
- if (ubwc_enabled) {
- uint32_t block_width, block_height;
- fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
-
- OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
- OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
- OUT_RING(ring,
- A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(fdl_ubwc_pitch(&rsc->layout, img->level)) |
- A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(img->width, block_width))) |
- A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(img->height, block_height))));
- } else {
- OUT_RING(ring, 0x00000000); /* texconst7 */
- OUT_RING(ring, 0x00000000); /* texconst8 */
- OUT_RING(ring, 0x00000000); /* texconst9 */
- OUT_RING(ring, 0x00000000); /* texconst10 */
- }
-
- OUT_RING(ring, 0x00000000); /* texconst11 */
- OUT_RING(ring, 0x00000000); /* texconst12 */
- OUT_RING(ring, 0x00000000); /* texconst13 */
- OUT_RING(ring, 0x00000000); /* texconst14 */
- OUT_RING(ring, 0x00000000); /* texconst15 */
+ struct fd_resource *rsc = fd_resource(img->prsc);
+ bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
+
+ OUT_RING(ring,
+ fd6_tex_const_0(img->prsc, img->level, img->pfmt, PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
+ OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
+ A6XX_TEX_CONST_1_HEIGHT(img->height));
+ OUT_RING(ring,
+ COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
+ A6XX_TEX_CONST_2_TYPE(img->type) |
+ A6XX_TEX_CONST_2_PITCH(img->pitch));
+ OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch) |
+ COND(ubwc_enabled, A6XX_TEX_CONST_3_FLAG) |
+ COND(rsc->layout.tile_all, A6XX_TEX_CONST_3_TILE_ALL));
+ if (img->bo) {
+ OUT_RELOC(ring, img->bo, img->offset,
+ (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth));
+ }
+
+ OUT_RING(ring, 0x00000000); /* texconst6 */
+
+ if (ubwc_enabled) {
+ uint32_t block_width, block_height;
+ fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
+ OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
+ OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(
+ rsc->layout.ubwc_layer_size >> 2));
+ OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(
+ fdl_ubwc_pitch(&rsc->layout, img->level)) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(
+ DIV_ROUND_UP(img->width, block_width))) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(
+ DIV_ROUND_UP(img->height, block_height))));
+ } else {
+ OUT_RING(ring, 0x00000000); /* texconst7 */
+ OUT_RING(ring, 0x00000000); /* texconst8 */
+ OUT_RING(ring, 0x00000000); /* texconst9 */
+ OUT_RING(ring, 0x00000000); /* texconst10 */
+ }
+
+ OUT_RING(ring, 0x00000000); /* texconst11 */
+ OUT_RING(ring, 0x00000000); /* texconst12 */
+ OUT_RING(ring, 0x00000000); /* texconst13 */
+ OUT_RING(ring, 0x00000000); /* texconst14 */
+ OUT_RING(ring, 0x00000000); /* texconst15 */
}
void
-fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg)
+fd6_emit_image_tex(struct fd_ringbuffer *ring,
+ const struct pipe_image_view *pimg)
{
- struct fd6_image img;
- translate_image(&img, pimg);
- emit_image_tex(ring, &img);
+ struct fd6_image img;
+ translate_image(&img, pimg);
+ emit_image_tex(ring, &img);
}
void
-fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf)
+fd6_emit_ssbo_tex(struct fd_ringbuffer *ring,
+ const struct pipe_shader_buffer *pbuf)
{
- struct fd6_image img;
- translate_buf(&img, pbuf);
- emit_image_tex(ring, &img);
+ struct fd6_image img;
+ translate_buf(&img, pbuf);
+ emit_image_tex(ring, &img);
}
-static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
+static void
+emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
{
- /* If the SSBO isn't present (becasue gallium doesn't pack atomic
- * counters), zero-fill the slot.
- */
- if (!img->prsc) {
- for (int i = 0; i < 16; i++)
- OUT_RING(ring, 0);
- return;
- }
-
- struct fd_resource *rsc = fd_resource(img->prsc);
- enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level);
- bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
-
- OUT_RING(ring, A6XX_IBO_0_FMT(fd6_pipe2tex(img->pfmt)) |
- A6XX_IBO_0_TILE_MODE(tile_mode));
- OUT_RING(ring, A6XX_IBO_1_WIDTH(img->width) |
- A6XX_IBO_1_HEIGHT(img->height));
- OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
- COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
- A6XX_IBO_2_TYPE(img->type));
- OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch) |
- COND(ubwc_enabled, A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27));
- if (img->bo) {
- OUT_RELOC(ring, img->bo, img->offset,
- (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
- }
- OUT_RING(ring, 0x00000000);
-
- if (ubwc_enabled) {
- OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
- OUT_RING(ring, A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
- OUT_RING(ring, A6XX_IBO_10_FLAG_BUFFER_PITCH(fdl_ubwc_pitch(&rsc->layout, img->level)));
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
-
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ /* If the SSBO isn't present (becasue gallium doesn't pack atomic
+ * counters), zero-fill the slot.
+ */
+ if (!img->prsc) {
+ for (int i = 0; i < 16; i++)
+ OUT_RING(ring, 0);
+ return;
+ }
+
+ struct fd_resource *rsc = fd_resource(img->prsc);
+ enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level);
+ bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
+
+ OUT_RING(ring, A6XX_IBO_0_FMT(fd6_pipe2tex(img->pfmt)) |
+ A6XX_IBO_0_TILE_MODE(tile_mode));
+ OUT_RING(ring,
+ A6XX_IBO_1_WIDTH(img->width) | A6XX_IBO_1_HEIGHT(img->height));
+ OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
+ COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
+ A6XX_IBO_2_TYPE(img->type));
+ OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch) |
+ COND(ubwc_enabled, A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27));
+ if (img->bo) {
+ OUT_RELOC(ring, img->bo, img->offset,
+ (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
+ }
+ OUT_RING(ring, 0x00000000);
+
+ if (ubwc_enabled) {
+ OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
+ OUT_RING(ring, A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(
+ rsc->layout.ubwc_layer_size >> 2));
+ OUT_RING(ring, A6XX_IBO_10_FLAG_BUFFER_PITCH(
+ fdl_ubwc_pitch(&rsc->layout, img->level)));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
}
/* Build combined image/SSBO "IBO" state, returns ownership of state reference */
struct fd_ringbuffer *
fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
- enum pipe_shader_type shader)
+ enum pipe_shader_type shader)
{
- struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
- struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
-
- struct fd_ringbuffer *state =
- fd_submit_new_ringbuffer(ctx->batch->submit,
- (v->shader->nir->info.num_ssbos +
- v->shader->nir->info.num_images) * 16 * 4,
- FD_RINGBUFFER_STREAMING);
-
- assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
-
- for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) {
- struct fd6_image img;
- translate_buf(&img, &bufso->sb[i]);
- emit_image_ssbo(state, &img);
- }
-
- for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) {
- struct fd6_image img;
- translate_image(&img, &imgso->si[i]);
- emit_image_ssbo(state, &img);
- }
-
- return state;
+ struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
+ struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
+
+ struct fd_ringbuffer *state = fd_submit_new_ringbuffer(
+ ctx->batch->submit,
+ (v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images) * 16 *
+ 4,
+ FD_RINGBUFFER_STREAMING);
+
+ assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+
+ for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) {
+ struct fd6_image img;
+ translate_buf(&img, &bufso->sb[i]);
+ emit_image_ssbo(state, &img);
+ }
+
+ for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) {
+ struct fd6_image img;
+ translate_image(&img, &imgso->si[i]);
+ emit_image_ssbo(state, &img);
+ }
+
+ return state;
}
-static void fd6_set_shader_images(struct pipe_context *pctx,
- enum pipe_shader_type shader,
- unsigned start, unsigned count,
- unsigned unbind_num_trailing_slots,
- const struct pipe_image_view *images)
- in_dt
+static void
+fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
+ unsigned start, unsigned count,
+ unsigned unbind_num_trailing_slots,
+ const struct pipe_image_view *images) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
- fd_set_shader_images(pctx, shader, start, count,
- unbind_num_trailing_slots, images);
+ fd_set_shader_images(pctx, shader, start, count, unbind_num_trailing_slots,
+ images);
- if (!images)
- return;
+ if (!images)
+ return;
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + start;
- struct pipe_image_view *buf = &so->si[n];
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + start;
+ struct pipe_image_view *buf = &so->si[n];
- if (!buf->resource)
- continue;
+ if (!buf->resource)
+ continue;
- fd6_validate_format(ctx, fd_resource(buf->resource), buf->format);
- }
+ fd6_validate_format(ctx, fd_resource(buf->resource), buf->format);
+ }
}
void
fd6_image_init(struct pipe_context *pctx)
{
- pctx->set_shader_images = fd6_set_shader_images;
+ pctx->set_shader_images = fd6_set_shader_images;
}
#include "freedreno_context.h"
-void fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg) assert_dt;
-void fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf) assert_dt;
+void fd6_emit_image_tex(struct fd_ringbuffer *ring,
+ const struct pipe_image_view *pimg) assert_dt;
+void fd6_emit_ssbo_tex(struct fd_ringbuffer *ring,
+ const struct pipe_shader_buffer *pbuf) assert_dt;
struct ir3_shader_variant;
-struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx,
- const struct ir3_shader_variant *v, enum pipe_shader_type shader) assert_dt;
+struct fd_ringbuffer *
+fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
+ enum pipe_shader_type shader) assert_dt;
void fd6_image_init(struct pipe_context *pctx);
#include "a6xx.xml.h"
struct fd_reg_pair {
- uint32_t reg;
- uint64_t value;
- struct fd_bo *bo;
- bool is_address;
- bool bo_write;
- uint32_t bo_offset;
- uint32_t bo_shift;
+ uint32_t reg;
+ uint64_t value;
+ struct fd_bo *bo;
+ bool is_address;
+ bool bo_write;
+ uint32_t bo_offset;
+ uint32_t bo_shift;
};
#define __bo_type struct fd_bo *
#include "a6xx-pack.xml.h"
#include "adreno-pm4-pack.xml.h"
-#define __assert_eq(a, b) \
- do { \
- if ((a) != (b)) { \
- fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
- assert((a) == (b)); \
- } \
- } while (0)
+#define __assert_eq(a, b) \
+ do { \
+ if ((a) != (b)) { \
+ fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, \
+ b); \
+ assert((a) == (b)); \
+ } \
+ } while (0)
-#define __ONE_REG(i, ...) \
- do { \
- const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
- /* NOTE: allow regs[0].reg==0, this happens in OUT_PKT() */ \
- if (i < ARRAY_SIZE(regs) && (i == 0 || regs[i].reg > 0)) { \
- __assert_eq(regs[0].reg + i, regs[i].reg); \
- if (regs[i].bo) { \
- ring->cur = p; \
- p += 2; \
- OUT_RELOC(ring, regs[i].bo, regs[i].bo_offset, \
- regs[i].value, regs[i].bo_shift); \
- } else { \
- *p++ = regs[i].value; \
- if (regs[i].is_address) \
- *p++ = regs[i].value >> 32; \
- } \
- } \
- } while (0)
+#define __ONE_REG(i, ...) \
+ do { \
+ const struct fd_reg_pair regs[] = {__VA_ARGS__}; \
+ /* NOTE: allow regs[0].reg==0, this happens in OUT_PKT() */ \
+ if (i < ARRAY_SIZE(regs) && (i == 0 || regs[i].reg > 0)) { \
+ __assert_eq(regs[0].reg + i, regs[i].reg); \
+ if (regs[i].bo) { \
+ ring->cur = p; \
+ p += 2; \
+ OUT_RELOC(ring, regs[i].bo, regs[i].bo_offset, regs[i].value, \
+ regs[i].bo_shift); \
+ } else { \
+ *p++ = regs[i].value; \
+ if (regs[i].is_address) \
+ *p++ = regs[i].value >> 32; \
+ } \
+ } \
+ } while (0)
-#define OUT_REG(ring, ...) \
- do { \
- const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
- unsigned count = ARRAY_SIZE(regs); \
- \
- STATIC_ASSERT(count > 0); \
- STATIC_ASSERT(count <= 16); \
- \
- BEGIN_RING(ring, count + 1); \
- uint32_t *p = ring->cur; \
- *p++ = CP_TYPE4_PKT | count | \
- (_odd_parity_bit(count) << 7) | \
- ((regs[0].reg & 0x3ffff) << 8) | \
- ((_odd_parity_bit(regs[0].reg) << 27)); \
- \
- __ONE_REG( 0, __VA_ARGS__); \
- __ONE_REG( 1, __VA_ARGS__); \
- __ONE_REG( 2, __VA_ARGS__); \
- __ONE_REG( 3, __VA_ARGS__); \
- __ONE_REG( 4, __VA_ARGS__); \
- __ONE_REG( 5, __VA_ARGS__); \
- __ONE_REG( 6, __VA_ARGS__); \
- __ONE_REG( 7, __VA_ARGS__); \
- __ONE_REG( 8, __VA_ARGS__); \
- __ONE_REG( 9, __VA_ARGS__); \
- __ONE_REG(10, __VA_ARGS__); \
- __ONE_REG(11, __VA_ARGS__); \
- __ONE_REG(12, __VA_ARGS__); \
- __ONE_REG(13, __VA_ARGS__); \
- __ONE_REG(14, __VA_ARGS__); \
- __ONE_REG(15, __VA_ARGS__); \
- ring->cur = p; \
- } while (0)
+#define OUT_REG(ring, ...) \
+ do { \
+ const struct fd_reg_pair regs[] = {__VA_ARGS__}; \
+ unsigned count = ARRAY_SIZE(regs); \
+ \
+ STATIC_ASSERT(count > 0); \
+ STATIC_ASSERT(count <= 16); \
+ \
+ BEGIN_RING(ring, count + 1); \
+ uint32_t *p = ring->cur; \
+ *p++ = CP_TYPE4_PKT | count | (_odd_parity_bit(count) << 7) | \
+ ((regs[0].reg & 0x3ffff) << 8) | \
+ ((_odd_parity_bit(regs[0].reg) << 27)); \
+ \
+ __ONE_REG(0, __VA_ARGS__); \
+ __ONE_REG(1, __VA_ARGS__); \
+ __ONE_REG(2, __VA_ARGS__); \
+ __ONE_REG(3, __VA_ARGS__); \
+ __ONE_REG(4, __VA_ARGS__); \
+ __ONE_REG(5, __VA_ARGS__); \
+ __ONE_REG(6, __VA_ARGS__); \
+ __ONE_REG(7, __VA_ARGS__); \
+ __ONE_REG(8, __VA_ARGS__); \
+ __ONE_REG(9, __VA_ARGS__); \
+ __ONE_REG(10, __VA_ARGS__); \
+ __ONE_REG(11, __VA_ARGS__); \
+ __ONE_REG(12, __VA_ARGS__); \
+ __ONE_REG(13, __VA_ARGS__); \
+ __ONE_REG(14, __VA_ARGS__); \
+ __ONE_REG(15, __VA_ARGS__); \
+ ring->cur = p; \
+ } while (0)
-#define OUT_PKT(ring, opcode, ...) \
- do { \
- const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
- unsigned count = ARRAY_SIZE(regs); \
- \
- STATIC_ASSERT(count <= 16); \
- \
- BEGIN_RING(ring, count + 1); \
- uint32_t *p = ring->cur; \
- *p++ = CP_TYPE7_PKT | count | \
- (_odd_parity_bit(count) << 15) | \
- ((opcode & 0x7f) << 16) | \
- ((_odd_parity_bit(opcode) << 23)); \
- \
- __ONE_REG( 0, __VA_ARGS__); \
- __ONE_REG( 1, __VA_ARGS__); \
- __ONE_REG( 2, __VA_ARGS__); \
- __ONE_REG( 3, __VA_ARGS__); \
- __ONE_REG( 4, __VA_ARGS__); \
- __ONE_REG( 5, __VA_ARGS__); \
- __ONE_REG( 6, __VA_ARGS__); \
- __ONE_REG( 7, __VA_ARGS__); \
- __ONE_REG( 8, __VA_ARGS__); \
- __ONE_REG( 9, __VA_ARGS__); \
- __ONE_REG(10, __VA_ARGS__); \
- __ONE_REG(11, __VA_ARGS__); \
- __ONE_REG(12, __VA_ARGS__); \
- __ONE_REG(13, __VA_ARGS__); \
- __ONE_REG(14, __VA_ARGS__); \
- __ONE_REG(15, __VA_ARGS__); \
- ring->cur = p; \
- } while (0)
+#define OUT_PKT(ring, opcode, ...) \
+ do { \
+ const struct fd_reg_pair regs[] = {__VA_ARGS__}; \
+ unsigned count = ARRAY_SIZE(regs); \
+ \
+ STATIC_ASSERT(count <= 16); \
+ \
+ BEGIN_RING(ring, count + 1); \
+ uint32_t *p = ring->cur; \
+ *p++ = CP_TYPE7_PKT | count | (_odd_parity_bit(count) << 15) | \
+ ((opcode & 0x7f) << 16) | ((_odd_parity_bit(opcode) << 23)); \
+ \
+ __ONE_REG(0, __VA_ARGS__); \
+ __ONE_REG(1, __VA_ARGS__); \
+ __ONE_REG(2, __VA_ARGS__); \
+ __ONE_REG(3, __VA_ARGS__); \
+ __ONE_REG(4, __VA_ARGS__); \
+ __ONE_REG(5, __VA_ARGS__); \
+ __ONE_REG(6, __VA_ARGS__); \
+ __ONE_REG(7, __VA_ARGS__); \
+ __ONE_REG(8, __VA_ARGS__); \
+ __ONE_REG(9, __VA_ARGS__); \
+ __ONE_REG(10, __VA_ARGS__); \
+ __ONE_REG(11, __VA_ARGS__); \
+ __ONE_REG(12, __VA_ARGS__); \
+ __ONE_REG(13, __VA_ARGS__); \
+ __ONE_REG(14, __VA_ARGS__); \
+ __ONE_REG(15, __VA_ARGS__); \
+ ring->cur = p; \
+ } while (0)
/* similar to OUT_PKT() but appends specified # of dwords
* copied for buf to the end of the packet (ie. for use-
* cases like CP_LOAD_STATE)
*/
-#define OUT_PKTBUF(ring, opcode, dwords, sizedwords, ...) \
- do { \
- const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
- unsigned count = ARRAY_SIZE(regs); \
- \
- STATIC_ASSERT(count <= 16); \
- count += sizedwords; \
- \
- BEGIN_RING(ring, count + 1); \
- uint32_t *p = ring->cur; \
- *p++ = CP_TYPE7_PKT | count | \
- (_odd_parity_bit(count) << 15) | \
- ((opcode & 0x7f) << 16) | \
- ((_odd_parity_bit(opcode) << 23)); \
- \
- __ONE_REG( 0, __VA_ARGS__); \
- __ONE_REG( 1, __VA_ARGS__); \
- __ONE_REG( 2, __VA_ARGS__); \
- __ONE_REG( 3, __VA_ARGS__); \
- __ONE_REG( 4, __VA_ARGS__); \
- __ONE_REG( 5, __VA_ARGS__); \
- __ONE_REG( 6, __VA_ARGS__); \
- __ONE_REG( 7, __VA_ARGS__); \
- __ONE_REG( 8, __VA_ARGS__); \
- __ONE_REG( 9, __VA_ARGS__); \
- __ONE_REG(10, __VA_ARGS__); \
- __ONE_REG(11, __VA_ARGS__); \
- __ONE_REG(12, __VA_ARGS__); \
- __ONE_REG(13, __VA_ARGS__); \
- __ONE_REG(14, __VA_ARGS__); \
- __ONE_REG(15, __VA_ARGS__); \
- memcpy(p, dwords, 4 * sizedwords); \
- p += sizedwords; \
- ring->cur = p; \
- } while (0)
+#define OUT_PKTBUF(ring, opcode, dwords, sizedwords, ...) \
+ do { \
+ const struct fd_reg_pair regs[] = {__VA_ARGS__}; \
+ unsigned count = ARRAY_SIZE(regs); \
+ \
+ STATIC_ASSERT(count <= 16); \
+ count += sizedwords; \
+ \
+ BEGIN_RING(ring, count + 1); \
+ uint32_t *p = ring->cur; \
+ *p++ = CP_TYPE7_PKT | count | (_odd_parity_bit(count) << 15) | \
+ ((opcode & 0x7f) << 16) | ((_odd_parity_bit(opcode) << 23)); \
+ \
+ __ONE_REG(0, __VA_ARGS__); \
+ __ONE_REG(1, __VA_ARGS__); \
+ __ONE_REG(2, __VA_ARGS__); \
+ __ONE_REG(3, __VA_ARGS__); \
+ __ONE_REG(4, __VA_ARGS__); \
+ __ONE_REG(5, __VA_ARGS__); \
+ __ONE_REG(6, __VA_ARGS__); \
+ __ONE_REG(7, __VA_ARGS__); \
+ __ONE_REG(8, __VA_ARGS__); \
+ __ONE_REG(9, __VA_ARGS__); \
+ __ONE_REG(10, __VA_ARGS__); \
+ __ONE_REG(11, __VA_ARGS__); \
+ __ONE_REG(12, __VA_ARGS__); \
+ __ONE_REG(13, __VA_ARGS__); \
+ __ONE_REG(14, __VA_ARGS__); \
+ __ONE_REG(15, __VA_ARGS__); \
+ memcpy(p, dwords, 4 * sizedwords); \
+ p += sizedwords; \
+ ring->cur = p; \
+ } while (0)
#endif /* FD6_PACK_H */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
#include "util/bitset.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "freedreno_program.h"
-#include "fd6_program.h"
#include "fd6_const.h"
#include "fd6_emit.h"
-#include "fd6_texture.h"
#include "fd6_format.h"
#include "fd6_pack.h"
+#include "fd6_program.h"
+#include "fd6_texture.h"
void
fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *so)
+ const struct ir3_shader_variant *so)
{
- enum a6xx_state_block sb = fd6_stage2shadersb(so->type);
-
- uint32_t first_exec_offset = 0;
- uint32_t instrlen = 0;
-
- switch (so->type) {
- case MESA_SHADER_VERTEX:
- first_exec_offset = REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_VS_INSTRLEN;
- break;
- case MESA_SHADER_TESS_CTRL:
- first_exec_offset = REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_HS_INSTRLEN;
- break;
- case MESA_SHADER_TESS_EVAL:
- first_exec_offset = REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_DS_INSTRLEN;
- break;
- case MESA_SHADER_GEOMETRY:
- first_exec_offset = REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_GS_INSTRLEN;
- break;
- case MESA_SHADER_FRAGMENT:
- first_exec_offset = REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_FS_INSTRLEN;
- break;
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_KERNEL:
- first_exec_offset = REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET;
- instrlen = REG_A6XX_SP_CS_INSTRLEN;
- break;
- case MESA_SHADER_TASK:
- case MESA_SHADER_MESH:
- case MESA_SHADER_RAYGEN:
- case MESA_SHADER_ANY_HIT:
- case MESA_SHADER_CLOSEST_HIT:
- case MESA_SHADER_MISS:
- case MESA_SHADER_INTERSECTION:
- case MESA_SHADER_CALLABLE:
- unreachable("Unsupported shader stage");
- case MESA_SHADER_NONE:
- unreachable("");
- }
+ enum a6xx_state_block sb = fd6_stage2shadersb(so->type);
+
+ uint32_t first_exec_offset = 0;
+ uint32_t instrlen = 0;
+
+ switch (so->type) {
+ case MESA_SHADER_VERTEX:
+ first_exec_offset = REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_VS_INSTRLEN;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ first_exec_offset = REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_HS_INSTRLEN;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ first_exec_offset = REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_DS_INSTRLEN;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ first_exec_offset = REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_GS_INSTRLEN;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ first_exec_offset = REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_FS_INSTRLEN;
+ break;
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ first_exec_offset = REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET;
+ instrlen = REG_A6XX_SP_CS_INSTRLEN;
+ break;
+ case MESA_SHADER_TASK:
+ case MESA_SHADER_MESH:
+ case MESA_SHADER_RAYGEN:
+ case MESA_SHADER_ANY_HIT:
+ case MESA_SHADER_CLOSEST_HIT:
+ case MESA_SHADER_MISS:
+ case MESA_SHADER_INTERSECTION:
+ case MESA_SHADER_CALLABLE:
+ unreachable("Unsupported shader stage");
+ case MESA_SHADER_NONE:
+ unreachable("");
+ }
#ifdef DEBUG
- /* Name should generally match what you get with MESA_SHADER_CAPTURE_PATH: */
- const char *name = so->shader->nir->info.name;
- if (name)
- fd_emit_string5(ring, name, strlen(name));
+ /* Name should generally match what you get with MESA_SHADER_CAPTURE_PATH: */
+ const char *name = so->shader->nir->info.name;
+ if (name)
+ fd_emit_string5(ring, name, strlen(name));
#endif
- uint32_t fibers_per_sp = ctx->screen->info.fibers_per_sp;
- uint32_t num_sp_cores = ctx->screen->info.num_sp_cores;
-
- uint32_t per_fiber_size = ALIGN(so->pvtmem_size, 512);
- if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) {
- if (ctx->pvtmem[so->pvtmem_per_wave].bo)
- fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
- ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
- uint32_t total_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12)
- * num_sp_cores;
- ctx->pvtmem[so->pvtmem_per_wave].bo =
- fd_bo_new(ctx->screen->dev, total_size,
- DRM_FREEDRENO_GEM_TYPE_KMEM, "pvtmem_%s_%d",
- so->pvtmem_per_wave ? "per_wave" : "per_fiber",
- per_fiber_size);
- } else {
- per_fiber_size = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size;
- }
-
- uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
-
- OUT_PKT4(ring, instrlen, 1);
- OUT_RING(ring, so->instrlen);
-
- OUT_PKT4(ring, first_exec_offset, 7);
- OUT_RING(ring, 0); /* SP_xS_OBJ_FIRST_EXEC_OFFSET */
- OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_xS_OBJ_START_LO */
- OUT_RING(ring, A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(per_fiber_size));
- if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */
- OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0);
- } else {
- OUT_RING(ring, 0);
- OUT_RING(ring, 0);
- }
- OUT_RING(ring, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size) |
- COND(so->pvtmem_per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
-
- OUT_PKT7(ring, fd6_stage2opcode(so->type), 3);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(so->instrlen));
- OUT_RELOC(ring, so->bo, 0, 0, 0);
+ uint32_t fibers_per_sp = ctx->screen->info.fibers_per_sp;
+ uint32_t num_sp_cores = ctx->screen->info.num_sp_cores;
+
+ uint32_t per_fiber_size = ALIGN(so->pvtmem_size, 512);
+ if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) {
+ if (ctx->pvtmem[so->pvtmem_per_wave].bo)
+ fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
+ ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
+ uint32_t total_size =
+ ALIGN(per_fiber_size * fibers_per_sp, 1 << 12) * num_sp_cores;
+ ctx->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new(
+ ctx->screen->dev, total_size, DRM_FREEDRENO_GEM_TYPE_KMEM,
+ "pvtmem_%s_%d", so->pvtmem_per_wave ? "per_wave" : "per_fiber",
+ per_fiber_size);
+ } else {
+ per_fiber_size = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size;
+ }
+
+ uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
+
+ OUT_PKT4(ring, instrlen, 1);
+ OUT_RING(ring, so->instrlen);
+
+ OUT_PKT4(ring, first_exec_offset, 7);
+ OUT_RING(ring, 0); /* SP_xS_OBJ_FIRST_EXEC_OFFSET */
+ OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_xS_OBJ_START_LO */
+ OUT_RING(ring, A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(per_fiber_size));
+ if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */
+ OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0);
+ } else {
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ }
+ OUT_RING(ring, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size) |
+ COND(so->pvtmem_per_wave,
+ A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
+
+ OUT_PKT7(ring, fd6_stage2opcode(so->type), 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE6_0_NUM_UNIT(so->instrlen));
+ OUT_RELOC(ring, so->bo, 0, 0, 0);
}
-
static void
-setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_variant *v,
- struct ir3_shader_linkage *l)
+setup_stream_out(struct fd6_program_state *state,
+ const struct ir3_shader_variant *v,
+ struct ir3_shader_linkage *l)
{
- const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
-
- uint32_t ncomp[PIPE_MAX_SO_BUFFERS];
- uint32_t prog[256/2];
- uint32_t prog_count;
-
- memset(ncomp, 0, sizeof(ncomp));
- memset(prog, 0, sizeof(prog));
-
- prog_count = align(l->max_loc, 2) / 2;
-
- debug_assert(prog_count < ARRAY_SIZE(prog));
-
- for (unsigned i = 0; i < strmout->num_outputs; i++) {
- const struct ir3_stream_output *out = &strmout->output[i];
- unsigned k = out->register_index;
- unsigned idx;
-
- ncomp[out->output_buffer] += out->num_components;
-
- /* linkage map sorted by order frag shader wants things, so
- * a bit less ideal here..
- */
- for (idx = 0; idx < l->cnt; idx++)
- if (l->var[idx].regid == v->outputs[k].regid)
- break;
-
- debug_assert(idx < l->cnt);
-
- for (unsigned j = 0; j < out->num_components; j++) {
- unsigned c = j + out->start_component;
- unsigned loc = l->var[idx].loc + c;
- unsigned off = j + out->dst_offset; /* in dwords */
-
- if (loc & 1) {
- prog[loc/2] |= A6XX_VPC_SO_PROG_B_EN |
- A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
- A6XX_VPC_SO_PROG_B_OFF(off * 4);
- } else {
- prog[loc/2] |= A6XX_VPC_SO_PROG_A_EN |
- A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
- A6XX_VPC_SO_PROG_A_OFF(off * 4);
- }
- }
- }
-
- struct fd_ringbuffer *ring = state->streamout_stateobj;
-
- OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * prog_count));
- OUT_RING(ring, REG_A6XX_VPC_SO_STREAM_CNTL);
- OUT_RING(ring, A6XX_VPC_SO_STREAM_CNTL_STREAM_ENABLE(0x1) |
- COND(ncomp[0] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF0_STREAM(1)) |
- COND(ncomp[1] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF1_STREAM(1)) |
- COND(ncomp[2] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF2_STREAM(1)) |
- COND(ncomp[3] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF3_STREAM(1)));
- OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(0));
- OUT_RING(ring, ncomp[0]);
- OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(1));
- OUT_RING(ring, ncomp[1]);
- OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(2));
- OUT_RING(ring, ncomp[2]);
- OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3));
- OUT_RING(ring, ncomp[3]);
- OUT_RING(ring, REG_A6XX_VPC_SO_CNTL);
- OUT_RING(ring, A6XX_VPC_SO_CNTL_RESET);
- for (unsigned i = 0; i < prog_count; i++) {
- OUT_RING(ring, REG_A6XX_VPC_SO_PROG);
- OUT_RING(ring, prog[i]);
- }
+ const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
+
+ uint32_t ncomp[PIPE_MAX_SO_BUFFERS];
+ uint32_t prog[256 / 2];
+ uint32_t prog_count;
+
+ memset(ncomp, 0, sizeof(ncomp));
+ memset(prog, 0, sizeof(prog));
+
+ prog_count = align(l->max_loc, 2) / 2;
+
+ debug_assert(prog_count < ARRAY_SIZE(prog));
+
+ for (unsigned i = 0; i < strmout->num_outputs; i++) {
+ const struct ir3_stream_output *out = &strmout->output[i];
+ unsigned k = out->register_index;
+ unsigned idx;
+
+ ncomp[out->output_buffer] += out->num_components;
+
+ /* linkage map sorted by order frag shader wants things, so
+ * a bit less ideal here..
+ */
+ for (idx = 0; idx < l->cnt; idx++)
+ if (l->var[idx].regid == v->outputs[k].regid)
+ break;
+
+ debug_assert(idx < l->cnt);
+
+ for (unsigned j = 0; j < out->num_components; j++) {
+ unsigned c = j + out->start_component;
+ unsigned loc = l->var[idx].loc + c;
+ unsigned off = j + out->dst_offset; /* in dwords */
+
+ if (loc & 1) {
+ prog[loc / 2] |= A6XX_VPC_SO_PROG_B_EN |
+ A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
+ A6XX_VPC_SO_PROG_B_OFF(off * 4);
+ } else {
+ prog[loc / 2] |= A6XX_VPC_SO_PROG_A_EN |
+ A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
+ A6XX_VPC_SO_PROG_A_OFF(off * 4);
+ }
+ }
+ }
+
+ struct fd_ringbuffer *ring = state->streamout_stateobj;
+
+ OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * prog_count));
+ OUT_RING(ring, REG_A6XX_VPC_SO_STREAM_CNTL);
+ OUT_RING(ring,
+ A6XX_VPC_SO_STREAM_CNTL_STREAM_ENABLE(0x1) |
+ COND(ncomp[0] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF0_STREAM(1)) |
+ COND(ncomp[1] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF1_STREAM(1)) |
+ COND(ncomp[2] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF2_STREAM(1)) |
+ COND(ncomp[3] > 0, A6XX_VPC_SO_STREAM_CNTL_BUF3_STREAM(1)));
+ OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(0));
+ OUT_RING(ring, ncomp[0]);
+ OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(1));
+ OUT_RING(ring, ncomp[1]);
+ OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(2));
+ OUT_RING(ring, ncomp[2]);
+ OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3));
+ OUT_RING(ring, ncomp[3]);
+ OUT_RING(ring, REG_A6XX_VPC_SO_CNTL);
+ OUT_RING(ring, A6XX_VPC_SO_CNTL_RESET);
+ for (unsigned i = 0; i < prog_count; i++) {
+ OUT_RING(ring, REG_A6XX_VPC_SO_PROG);
+ OUT_RING(ring, prog[i]);
+ }
}
static void
-setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state)
+setup_config_stateobj(struct fd_ringbuffer *ring,
+ struct fd6_program_state *state)
{
- OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
- .vs_state = true,
- .hs_state = true,
- .ds_state = true,
- .gs_state = true,
- .fs_state = true,
- .cs_state = true,
- .gfx_ibo = true,
- .cs_ibo = true,
- ));
-
- debug_assert(state->vs->constlen >= state->bs->constlen);
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
- OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) |
- A6XX_HLSQ_VS_CNTL_ENABLED);
- OUT_RING(ring, COND(state->hs,
- A6XX_HLSQ_HS_CNTL_ENABLED |
- A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen)));
- OUT_RING(ring, COND(state->ds,
- A6XX_HLSQ_DS_CNTL_ENABLED |
- A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen)));
- OUT_RING(ring, COND(state->gs,
- A6XX_HLSQ_GS_CNTL_ENABLED |
- A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen)));
- OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1);
- OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) |
- A6XX_HLSQ_FS_CNTL_ENABLED);
-
- OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);
- OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) |
- A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) |
- A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) |
- A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp));
-
- OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1);
- OUT_RING(ring, COND(state->hs,
- A6XX_SP_HS_CONFIG_ENABLED |
- A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) |
- A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) |
- A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp)));
-
- OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1);
- OUT_RING(ring, COND(state->ds,
- A6XX_SP_DS_CONFIG_ENABLED |
- A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) |
- A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) |
- A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp)));
-
- OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1);
- OUT_RING(ring, COND(state->gs,
- A6XX_SP_GS_CONFIG_ENABLED |
- A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) |
- A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) |
- A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp)));
-
- OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1);
- OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) |
- A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) |
- A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) |
- A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp));
-
- OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
- OUT_RING(ring, ir3_shader_nibo(state->fs));
+ OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
+ .ds_state = true, .gs_state = true,
+ .fs_state = true, .cs_state = true,
+ .gfx_ibo = true, .cs_ibo = true, ));
+
+ debug_assert(state->vs->constlen >= state->bs->constlen);
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
+ OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) |
+ A6XX_HLSQ_VS_CNTL_ENABLED);
+ OUT_RING(ring, COND(state->hs,
+ A6XX_HLSQ_HS_CNTL_ENABLED |
+ A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen)));
+ OUT_RING(ring, COND(state->ds,
+ A6XX_HLSQ_DS_CNTL_ENABLED |
+ A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen)));
+ OUT_RING(ring, COND(state->gs,
+ A6XX_HLSQ_GS_CNTL_ENABLED |
+ A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen)));
+ OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1);
+ OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) |
+ A6XX_HLSQ_FS_CNTL_ENABLED);
+
+ OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);
+ OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) |
+ A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) |
+ A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) |
+ A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp));
+
+ OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1);
+ OUT_RING(ring, COND(state->hs,
+ A6XX_SP_HS_CONFIG_ENABLED |
+ A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) |
+ A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) |
+ A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp)));
+
+ OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1);
+ OUT_RING(ring, COND(state->ds,
+ A6XX_SP_DS_CONFIG_ENABLED |
+ A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) |
+ A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) |
+ A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp)));
+
+ OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1);
+ OUT_RING(ring, COND(state->gs,
+ A6XX_SP_GS_CONFIG_ENABLED |
+ A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) |
+ A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) |
+ A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp)));
+
+ OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1);
+ OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) |
+ A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) |
+ A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) |
+ A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp));
+
+ OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
+ OUT_RING(ring, ir3_shader_nibo(state->fs));
}
static inline uint32_t
next_regid(uint32_t reg, uint32_t increment)
{
- if (VALIDREG(reg))
- return reg + increment;
- else
- return regid(63,0);
+ if (VALIDREG(reg))
+ return reg + increment;
+ else
+ return regid(63, 0);
}
static void
setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
- struct fd6_program_state *state, const struct ir3_shader_key *key,
- bool binning_pass)
- assert_dt
+ struct fd6_program_state *state,
+ const struct ir3_shader_key *key, bool binning_pass) assert_dt
{
- uint32_t pos_regid, psize_regid, color_regid[8], posz_regid;
- uint32_t clip0_regid, clip1_regid;
- uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
- uint32_t smask_in_regid, smask_regid;
- uint32_t stencilref_regid;
- uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid;
- uint32_t hs_invocation_regid;
- uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_patch_regid, ds_patch_regid;
- uint32_t ij_regid[IJ_COUNT];
- uint32_t gs_header_regid;
- enum a6xx_threadsize fssz;
- uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0;
- uint8_t clip0_loc, clip1_loc;
- int i, j;
-
- static const struct ir3_shader_variant dummy_fs = {0};
- const struct ir3_shader_variant *vs = binning_pass ? state->bs : state->vs;
- const struct ir3_shader_variant *hs = state->hs;
- const struct ir3_shader_variant *ds = state->ds;
- const struct ir3_shader_variant *gs = state->gs;
- const struct ir3_shader_variant *fs = binning_pass ? &dummy_fs : state->fs;
-
- /* binning VS is wrong when GS is present, so use nonbinning VS
- * TODO: compile both binning VS/GS variants correctly
- */
- if (binning_pass && state->gs)
- vs = state->vs;
-
- bool sample_shading = fs->per_samp | key->sample_shading;
-
- fssz = fs->info.double_threadsize ? THREAD128 : THREAD64;
-
- pos_regid = ir3_find_output_regid(vs, VARYING_SLOT_POS);
- psize_regid = ir3_find_output_regid(vs, VARYING_SLOT_PSIZ);
- clip0_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST0);
- clip1_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST1);
- layer_regid = ir3_find_output_regid(vs, VARYING_SLOT_LAYER);
- vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
- instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
-
- if (hs) {
- tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
- tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
- hs_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
- ds_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
- hs_invocation_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
-
- pos_regid = ir3_find_output_regid(ds, VARYING_SLOT_POS);
- psize_regid = ir3_find_output_regid(ds, VARYING_SLOT_PSIZ);
- clip0_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST0);
- clip1_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST1);
- } else {
- tess_coord_x_regid = regid(63, 0);
- tess_coord_y_regid = regid(63, 0);
- hs_patch_regid = regid(63, 0);
- ds_patch_regid = regid(63, 0);
- hs_invocation_regid = regid(63, 0);
- }
-
- if (gs) {
- gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
- primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
- pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS);
- psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ);
- clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0);
- clip1_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST1);
- layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER);
- } else {
- gs_header_regid = regid(63, 0);
- primitive_regid = regid(63, 0);
- }
-
- if (fs->color0_mrt) {
- color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
- color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
- ir3_find_output_regid(fs, FRAG_RESULT_COLOR);
- } else {
- color_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0);
- color_regid[1] = ir3_find_output_regid(fs, FRAG_RESULT_DATA1);
- color_regid[2] = ir3_find_output_regid(fs, FRAG_RESULT_DATA2);
- color_regid[3] = ir3_find_output_regid(fs, FRAG_RESULT_DATA3);
- color_regid[4] = ir3_find_output_regid(fs, FRAG_RESULT_DATA4);
- color_regid[5] = ir3_find_output_regid(fs, FRAG_RESULT_DATA5);
- color_regid[6] = ir3_find_output_regid(fs, FRAG_RESULT_DATA6);
- color_regid[7] = ir3_find_output_regid(fs, FRAG_RESULT_DATA7);
- }
-
- samp_id_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID);
- smask_in_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN);
- face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
- coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
- zwcoord_regid = next_regid(coord_regid, 2);
- posz_regid = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
- smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
- stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
- for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
- ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
-
- /* If we have pre-dispatch texture fetches, then ij_pix should not
- * be DCE'd, even if not actually used in the shader itself:
- */
- if (fs->num_sampler_prefetch > 0) {
- assert(VALIDREG(ij_regid[IJ_PERSP_PIXEL]));
- /* also, it seems like ij_pix is *required* to be r0.x */
- assert(ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
- }
-
- /* we can't write gl_SampleMask for !msaa.. if b0 is zero then we
- * end up masking the single sample!!
- */
- if (!key->msaa)
- smask_regid = regid(63, 0);
-
- /* we could probably divide this up into things that need to be
- * emitted if frag-prog is dirty vs if vert-prog is dirty..
- */
-
- OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
- OUT_RING(ring, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
- A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
- 0x7000); // XXX
- for (int i = 0; i < fs->num_sampler_prefetch; i++) {
- const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
- OUT_RING(ring, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
- A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) |
- A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) |
- A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
- A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
- COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
- A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
- }
-
- OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1);
- OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
-
- bool fs_has_dual_src_color = !binning_pass &&
- fs->shader->nir->info.fs.color_is_dual_source;
-
- OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1);
- OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
- A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
- A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
- COND(fs_has_dual_src_color,
- A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
-
- OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
- OUT_RING(ring,
- A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
- A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
- COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
-
- fd6_emit_shader(ctx, ring, vs);
- fd6_emit_immediates(ctx->screen, vs, ring);
-
- struct ir3_shader_linkage l = {0};
- const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
-
- bool do_streamout = (last_shader->shader->stream_output.num_outputs > 0);
- uint8_t clip_mask = last_shader->clip_mask, cull_mask = last_shader->cull_mask;
- uint8_t clip_cull_mask = clip_mask | cull_mask;
-
- /* If we have streamout, link against the real FS, rather than the
- * dummy FS used for binning pass state, to ensure the OUTLOC's
- * match. Depending on whether we end up doing sysmem or gmem,
- * the actual streamout could happen with either the binning pass
- * or draw pass program, but the same streamout stateobj is used
- * in either case:
- */
- ir3_link_shaders(&l, last_shader, do_streamout ? state->fs : fs, true);
-
- bool primid_passthru = l.primid_loc != 0xff;
- clip0_loc = l.clip0_loc;
- clip1_loc = l.clip1_loc;
-
- OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4);
- OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
- OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
- OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
- OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
-
- /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
- ir3_link_stream_out(&l, last_shader);
-
- if (VALIDREG(layer_regid)) {
- layer_loc = l.max_loc;
- ir3_link_add(&l, layer_regid, 0x1, l.max_loc);
- }
-
- if (VALIDREG(pos_regid)) {
- pos_loc = l.max_loc;
- ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
- }
-
- if (VALIDREG(psize_regid)) {
- psize_loc = l.max_loc;
- ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
- }
-
- /* Handle the case where clip/cull distances aren't read by the FS. Make
- * sure to avoid adding an output with an empty writemask if the user
- * disables all the clip distances in the API so that the slot is unused.
- */
- if (clip0_loc == 0xff && VALIDREG(clip0_regid) && (clip_cull_mask & 0xf) != 0) {
- clip0_loc = l.max_loc;
- ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
- }
-
- if (clip1_loc == 0xff && VALIDREG(clip1_regid) && (clip_cull_mask >> 4) != 0) {
- clip1_loc = l.max_loc;
- ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
- }
-
- /* If we have stream-out, we use the full shader for binning
- * pass, rather than the optimized binning pass one, so that we
- * have all the varying outputs available for xfb. So streamout
- * state should always be derived from the non-binning pass
- * program:
- */
- if (do_streamout && !binning_pass) {
- setup_stream_out(state, last_shader, &l);
- }
-
- debug_assert(l.cnt <= 32);
- if (gs)
- OUT_PKT4(ring, REG_A6XX_SP_GS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
- else if (ds)
- OUT_PKT4(ring, REG_A6XX_SP_DS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
- else
- OUT_PKT4(ring, REG_A6XX_SP_VS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
-
- for (j = 0; j < l.cnt; ) {
- uint32_t reg = 0;
-
- reg |= A6XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
- reg |= A6XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
- j++;
-
- reg |= A6XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
- reg |= A6XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
- j++;
-
- OUT_RING(ring, reg);
- }
-
- if (gs)
- OUT_PKT4(ring, REG_A6XX_SP_GS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
- else if (ds)
- OUT_PKT4(ring, REG_A6XX_SP_DS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
- else
- OUT_PKT4(ring, REG_A6XX_SP_VS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
-
- for (j = 0; j < l.cnt; ) {
- uint32_t reg = 0;
-
- reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
- reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
- reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
- reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
-
- OUT_RING(ring, reg);
- }
-
- if (hs) {
- assert(vs->mergedregs == hs->mergedregs);
- OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
- OUT_RING(ring,
- A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
- A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
- A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
-
- fd6_emit_shader(ctx, ring, hs);
- fd6_emit_immediates(ctx->screen, hs, ring);
- fd6_emit_link_map(ctx->screen, vs, hs, ring);
-
- OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
- OUT_RING(ring,
- A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
- A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
- COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
-
- fd6_emit_shader(ctx, ring, ds);
- fd6_emit_immediates(ctx->screen, ds, ring);
- fd6_emit_link_map(ctx->screen, hs, ds, ring);
-
- shader_info *hs_info = &hs->shader->nir->info;
- OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
- OUT_RING(ring, hs_info->tess.tcs_vertices_out);
-
- /* Total attribute slots in HS incoming patch. */
- OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1);
- OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->output_size / 4);
-
- const uint32_t wavesize = 64;
- const uint32_t max_wave_input_size = 64;
- const uint32_t patch_control_points = hs_info->tess.tcs_vertices_out;
-
- /* note: if HS is really just the VS extended, then this
- * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
- * however that doesn't match the blob, and fails some dEQP tests.
- */
- uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
- uint32_t max_prims_per_wave =
- max_wave_input_size * wavesize / (vs->output_size * patch_control_points);
- prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
-
- uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
- uint32_t wave_input_size = DIV_ROUND_UP(total_size, wavesize);
-
- OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
- OUT_RING(ring, wave_input_size);
-
- shader_info *ds_info = &ds->shader->nir->info;
- OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1);
- uint32_t output;
- if (ds_info->tess.point_mode)
- output = TESS_POINTS;
- else if (ds_info->tess.primitive_mode == GL_ISOLINES)
- output = TESS_LINES;
- else if (ds_info->tess.ccw)
- output = TESS_CCW_TRIS;
- else
- output = TESS_CW_TRIS;
-
- OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING(fd6_gl2spacing(ds_info->tess.spacing)) |
- A6XX_PC_TESS_CNTL_OUTPUT(output));
-
- OUT_PKT4(ring, REG_A6XX_VPC_DS_CLIP_CNTL, 1);
- OUT_RING(ring, A6XX_VPC_DS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
- A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
- A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
-
- OUT_PKT4(ring, REG_A6XX_VPC_DS_LAYER_CNTL, 1);
- OUT_RING(ring, 0x0000ffff);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_DS_LAYER_CNTL, 1);
- OUT_RING(ring, 0x0);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_DS_CL_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_DS_CL_CNTL_CLIP_MASK(clip_mask) |
- A6XX_GRAS_DS_CL_CNTL_CULL_MASK(cull_mask));
-
- OUT_PKT4(ring, REG_A6XX_VPC_VS_PACK, 1);
- OUT_RING(ring, A6XX_VPC_VS_PACK_POSITIONLOC(pos_loc) |
- A6XX_VPC_VS_PACK_PSIZELOC(255) |
- A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc));
-
- OUT_PKT4(ring, REG_A6XX_VPC_DS_PACK, 1);
- OUT_RING(ring, A6XX_VPC_DS_PACK_POSITIONLOC(pos_loc) |
- A6XX_VPC_DS_PACK_PSIZELOC(psize_loc) |
- A6XX_VPC_DS_PACK_STRIDE_IN_VPC(l.max_loc));
-
- OUT_PKT4(ring, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(l.cnt));
-
- OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1);
- OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
- CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) |
- A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
-
- } else {
- OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
- OUT_RING(ring, 0);
- }
-
- OUT_PKT4(ring, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(l.cnt));
-
- bool enable_varyings = fs->total_in > 0;
-
- OUT_PKT4(ring, REG_A6XX_VPC_CNTL_0, 1);
- OUT_RING(ring, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) |
- COND(enable_varyings, A6XX_VPC_CNTL_0_VARYING) |
- A6XX_VPC_CNTL_0_PRIMIDLOC(l.primid_loc) |
- A6XX_VPC_CNTL_0_VIEWIDLOC(0xff));
-
- OUT_PKT4(ring, REG_A6XX_PC_VS_OUT_CNTL, 1);
- OUT_RING(ring, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
- CONDREG(psize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
- CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
- A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
-
- OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
- OUT_RING(ring, 0x7); /* XXX */
- OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
- A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
- A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
- A6XX_HLSQ_CONTROL_2_REG_SIZE(ij_regid[IJ_PERSP_SIZE]));
- OUT_RING(ring,
- A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
- A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
- A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) |
- A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(ij_regid[IJ_LINEAR_CENTROID]));
- OUT_RING(ring, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
- A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
- A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
- A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
- OUT_RING(ring, 0xfc); /* XXX */
-
- OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1);
- OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(fssz) |
- COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS));
-
- OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1);
- OUT_RING(ring, A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
- COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) |
- 0x1000000 |
- A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
- A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
- COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
- A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
- COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
-
- OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
- OUT_RING(ring, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
- A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(0xff));
-
- bool need_size = fs->frag_face || fs->fragcoord_compmask != 0;
- bool need_size_persamp = false;
- if (VALIDREG(ij_regid[IJ_PERSP_SIZE])) {
- if (sample_shading)
- need_size_persamp = true;
- else
- need_size = true;
- }
- if (VALIDREG(ij_regid[IJ_LINEAR_PIXEL]))
- need_size = true;
-
- /* XXX: enable bits for linear centroid and linear sample bary */
-
- OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1);
- OUT_RING(ring,
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
- CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) |
- COND(need_size, A6XX_GRAS_CNTL_SIZE) |
- COND(need_size_persamp, A6XX_GRAS_CNTL_SIZE_PERSAMP) |
- COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)));
-
- OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2);
- OUT_RING(ring,
- CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
- CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
- CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) |
- COND(need_size, A6XX_RB_RENDER_CONTROL0_SIZE) |
- COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
- COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) |
- COND(fs->fragcoord_compmask != 0,
- A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)));
-
- OUT_RING(ring,
- CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
- CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
- CONDREG(ij_regid[IJ_PERSP_SIZE], A6XX_RB_RENDER_CONTROL1_SIZE) |
- COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
-
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_CNTL, 1);
- OUT_RING(ring, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8101, 1);
- OUT_RING(ring, COND(sample_shading, 0x6)); // XXX
-
- OUT_PKT4(ring, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
- OUT_RING(ring, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
-
- OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
- for (i = 0; i < 8; i++) {
- OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
- COND(color_regid[i] & HALF_REG_ID, A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
- if (VALIDREG(color_regid[i])) {
- state->mrt_components |= 0xf << (i * 4);
- }
- }
-
- /* dual source blending has an extra fs output in the 2nd slot */
- if (fs_has_dual_src_color) {
- state->mrt_components |= 0xf << 4;
- }
-
- OUT_PKT4(ring, REG_A6XX_VPC_VS_PACK, 1);
- OUT_RING(ring, A6XX_VPC_VS_PACK_POSITIONLOC(pos_loc) |
- A6XX_VPC_VS_PACK_PSIZELOC(psize_loc) |
- A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc));
-
- if (gs) {
- assert(gs->mergedregs == (ds ? ds->mergedregs : vs->mergedregs));
- OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1);
- OUT_RING(ring,
- A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
- A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
- A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
-
- fd6_emit_shader(ctx, ring, gs);
- fd6_emit_immediates(ctx->screen, gs, ring);
- if (ds)
- fd6_emit_link_map(ctx->screen, ds, gs, ring);
- else
- fd6_emit_link_map(ctx->screen, vs, gs, ring);
-
- OUT_PKT4(ring, REG_A6XX_VPC_GS_PACK, 1);
- OUT_RING(ring, A6XX_VPC_GS_PACK_POSITIONLOC(pos_loc) |
- A6XX_VPC_GS_PACK_PSIZELOC(psize_loc) |
- A6XX_VPC_GS_PACK_STRIDE_IN_VPC(l.max_loc));
-
- OUT_PKT4(ring, REG_A6XX_VPC_GS_LAYER_CNTL, 1);
- OUT_RING(ring, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_GS_LAYER_CNTL, 1);
- OUT_RING(ring, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
-
- uint32_t flags_regid = ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3);
-
- OUT_PKT4(ring, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(l.cnt) |
- A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
-
- OUT_PKT4(ring, REG_A6XX_PC_GS_OUT_CNTL, 1);
- OUT_RING(ring, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
- CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
- CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
- CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
- A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
-
- uint32_t output;
- switch (gs->shader->nir->info.gs.output_primitive) {
- case GL_POINTS:
- output = TESS_POINTS;
- break;
- case GL_LINE_STRIP:
- output = TESS_LINES;
- break;
- case GL_TRIANGLE_STRIP:
- output = TESS_CW_TRIS;
- break;
- default:
- unreachable("");
- }
- OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
- OUT_RING(ring,
- A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(gs->shader->nir->info.gs.vertices_out - 1) |
- A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
- A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(gs->shader->nir->info.gs.invocations - 1));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_GS_CL_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_GS_CL_CNTL_CLIP_MASK(clip_mask) |
- A6XX_GRAS_GS_CL_CNTL_CULL_MASK(cull_mask));
-
- OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9100, 1);
- OUT_RING(ring, 0xff);
-
- OUT_PKT4(ring, REG_A6XX_VPC_GS_CLIP_CNTL, 1);
- OUT_RING(ring, A6XX_VPC_GS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
- A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
- A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
-
- const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs;
-
- /* Size of per-primitive alloction in ldlw memory in vec4s. */
- uint32_t vec4_size =
- gs->shader->nir->info.gs.vertices_in *
- DIV_ROUND_UP(prev->output_size, 4);
- OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
- OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
-
- OUT_PKT4(ring, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1);
- OUT_RING(ring, prev->output_size);
- } else {
- OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
- OUT_RING(ring, 0);
- OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1);
- OUT_RING(ring, 0);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_VS_LAYER_CNTL, 1);
- OUT_RING(ring, CONDREG(layer_regid, A6XX_GRAS_VS_LAYER_CNTL_WRITES_LAYER));
- }
-
- OUT_PKT4(ring, REG_A6XX_VPC_VS_CLIP_CNTL, 1);
- OUT_RING(ring, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
- A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
- A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
-
- OUT_PKT4(ring, REG_A6XX_GRAS_VS_CL_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(clip_mask) |
- A6XX_GRAS_VS_CL_CNTL_CULL_MASK(cull_mask));
-
- OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
- OUT_RING(ring, 0);
-
- if (fs->instrlen)
- fd6_emit_shader(ctx, ring, fs);
-
- OUT_REG(ring, A6XX_PC_PRIMID_PASSTHRU(primid_passthru));
-
- uint32_t non_sysval_input_count = 0;
- for (uint32_t i = 0; i < vs->inputs_count; i++)
- if (!vs->inputs[i].sysval)
- non_sysval_input_count++;
-
- OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
- OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(non_sysval_input_count) |
- A6XX_VFD_CONTROL_0_DECODE_CNT(non_sysval_input_count));
-
- OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), non_sysval_input_count);
- for (uint32_t i = 0; i < non_sysval_input_count; i++) {
- assert(vs->inputs[i].compmask);
- OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) |
- A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid));
- }
-
- OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
- OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
- A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
- A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) |
- 0xfc000000);
- OUT_RING(ring, A6XX_VFD_CONTROL_2_REGID_HSPATCHID(hs_patch_regid) |
- A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
- OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(ds_patch_regid) |
- A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
- A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
- 0xfc);
- OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
- OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) |
- 0xfc00); /* VFD_CONTROL_5 */
- OUT_RING(ring,
- COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */
-
- if (!binning_pass)
- fd6_emit_immediates(ctx->screen, fs, ring);
+ uint32_t pos_regid, psize_regid, color_regid[8], posz_regid;
+ uint32_t clip0_regid, clip1_regid;
+ uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
+ uint32_t smask_in_regid, smask_regid;
+ uint32_t stencilref_regid;
+ uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid;
+ uint32_t hs_invocation_regid;
+ uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_patch_regid,
+ ds_patch_regid;
+ uint32_t ij_regid[IJ_COUNT];
+ uint32_t gs_header_regid;
+ enum a6xx_threadsize fssz;
+ uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0;
+ uint8_t clip0_loc, clip1_loc;
+ int i, j;
+
+ static const struct ir3_shader_variant dummy_fs = {0};
+ const struct ir3_shader_variant *vs = binning_pass ? state->bs : state->vs;
+ const struct ir3_shader_variant *hs = state->hs;
+ const struct ir3_shader_variant *ds = state->ds;
+ const struct ir3_shader_variant *gs = state->gs;
+ const struct ir3_shader_variant *fs = binning_pass ? &dummy_fs : state->fs;
+
+ /* binning VS is wrong when GS is present, so use nonbinning VS
+ * TODO: compile both binning VS/GS variants correctly
+ */
+ if (binning_pass && state->gs)
+ vs = state->vs;
+
+ bool sample_shading = fs->per_samp | key->sample_shading;
+
+ fssz = fs->info.double_threadsize ? THREAD128 : THREAD64;
+
+ pos_regid = ir3_find_output_regid(vs, VARYING_SLOT_POS);
+ psize_regid = ir3_find_output_regid(vs, VARYING_SLOT_PSIZ);
+ clip0_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST0);
+ clip1_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST1);
+ layer_regid = ir3_find_output_regid(vs, VARYING_SLOT_LAYER);
+ vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
+ instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
+
+ if (hs) {
+ tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
+ tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
+ hs_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
+ ds_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
+ hs_invocation_regid =
+ ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
+
+ pos_regid = ir3_find_output_regid(ds, VARYING_SLOT_POS);
+ psize_regid = ir3_find_output_regid(ds, VARYING_SLOT_PSIZ);
+ clip0_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST0);
+ clip1_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST1);
+ } else {
+ tess_coord_x_regid = regid(63, 0);
+ tess_coord_y_regid = regid(63, 0);
+ hs_patch_regid = regid(63, 0);
+ ds_patch_regid = regid(63, 0);
+ hs_invocation_regid = regid(63, 0);
+ }
+
+ if (gs) {
+ gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
+ primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
+ pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS);
+ psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ);
+ clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0);
+ clip1_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST1);
+ layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER);
+ } else {
+ gs_header_regid = regid(63, 0);
+ primitive_regid = regid(63, 0);
+ }
+
+ if (fs->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+ ir3_find_output_regid(fs, FRAG_RESULT_COLOR);
+ } else {
+ color_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(fs, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(fs, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(fs, FRAG_RESULT_DATA3);
+ color_regid[4] = ir3_find_output_regid(fs, FRAG_RESULT_DATA4);
+ color_regid[5] = ir3_find_output_regid(fs, FRAG_RESULT_DATA5);
+ color_regid[6] = ir3_find_output_regid(fs, FRAG_RESULT_DATA6);
+ color_regid[7] = ir3_find_output_regid(fs, FRAG_RESULT_DATA7);
+ }
+
+ samp_id_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID);
+ smask_in_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN);
+ face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = next_regid(coord_regid, 2);
+ posz_regid = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
+ smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
+ stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
+ for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
+ ij_regid[i] =
+ ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
+
+ /* If we have pre-dispatch texture fetches, then ij_pix should not
+ * be DCE'd, even if not actually used in the shader itself:
+ */
+ if (fs->num_sampler_prefetch > 0) {
+ assert(VALIDREG(ij_regid[IJ_PERSP_PIXEL]));
+ /* also, it seems like ij_pix is *required* to be r0.x */
+ assert(ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
+ }
+
+ /* we can't write gl_SampleMask for !msaa.. if b0 is zero then we
+ * end up masking the single sample!!
+ */
+ if (!key->msaa)
+ smask_regid = regid(63, 0);
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
+ OUT_RING(ring, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
+ A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
+ 0x7000); // XXX
+ for (int i = 0; i < fs->num_sampler_prefetch; i++) {
+ const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
+ OUT_RING(ring,
+ A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
+ A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) |
+ A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) |
+ A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
+ A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
+ COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
+ A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
+ }
+
+ OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1);
+ OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
+
+ bool fs_has_dual_src_color =
+ !binning_pass && fs->shader->nir->info.fs.color_is_dual_source;
+
+ OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1);
+ OUT_RING(ring,
+ A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
+ A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
+ A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
+ COND(fs_has_dual_src_color,
+ A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
+
+ OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
+ OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
+ A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(
+ vs->info.max_half_reg + 1) |
+ COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
+
+ fd6_emit_shader(ctx, ring, vs);
+ fd6_emit_immediates(ctx->screen, vs, ring);
+
+ struct ir3_shader_linkage l = {0};
+ const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
+
+ bool do_streamout = (last_shader->shader->stream_output.num_outputs > 0);
+ uint8_t clip_mask = last_shader->clip_mask,
+ cull_mask = last_shader->cull_mask;
+ uint8_t clip_cull_mask = clip_mask | cull_mask;
+
+ /* If we have streamout, link against the real FS, rather than the
+ * dummy FS used for binning pass state, to ensure the OUTLOC's
+ * match. Depending on whether we end up doing sysmem or gmem,
+ * the actual streamout could happen with either the binning pass
+ * or draw pass program, but the same streamout stateobj is used
+ * in either case:
+ */
+ ir3_link_shaders(&l, last_shader, do_streamout ? state->fs : fs, true);
+
+ bool primid_passthru = l.primid_loc != 0xff;
+ clip0_loc = l.clip0_loc;
+ clip1_loc = l.clip1_loc;
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4);
+ OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
+ OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
+ OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
+ OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
+
+ /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
+ ir3_link_stream_out(&l, last_shader);
+
+ if (VALIDREG(layer_regid)) {
+ layer_loc = l.max_loc;
+ ir3_link_add(&l, layer_regid, 0x1, l.max_loc);
+ }
+
+ if (VALIDREG(pos_regid)) {
+ pos_loc = l.max_loc;
+ ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
+ }
+
+ if (VALIDREG(psize_regid)) {
+ psize_loc = l.max_loc;
+ ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
+ }
+
+ /* Handle the case where clip/cull distances aren't read by the FS. Make
+ * sure to avoid adding an output with an empty writemask if the user
+ * disables all the clip distances in the API so that the slot is unused.
+ */
+ if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
+ (clip_cull_mask & 0xf) != 0) {
+ clip0_loc = l.max_loc;
+ ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
+ }
+
+ if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
+ (clip_cull_mask >> 4) != 0) {
+ clip1_loc = l.max_loc;
+ ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
+ }
+
+ /* If we have stream-out, we use the full shader for binning
+ * pass, rather than the optimized binning pass one, so that we
+ * have all the varying outputs available for xfb. So streamout
+ * state should always be derived from the non-binning pass
+ * program:
+ */
+ if (do_streamout && !binning_pass) {
+ setup_stream_out(state, last_shader, &l);
+ }
+
+ debug_assert(l.cnt <= 32);
+ if (gs)
+ OUT_PKT4(ring, REG_A6XX_SP_GS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
+ else if (ds)
+ OUT_PKT4(ring, REG_A6XX_SP_DS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
+ else
+ OUT_PKT4(ring, REG_A6XX_SP_VS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2));
+
+ for (j = 0; j < l.cnt;) {
+ uint32_t reg = 0;
+
+ reg |= A6XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
+ reg |= A6XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
+ j++;
+
+ reg |= A6XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
+ reg |= A6XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
+ j++;
+
+ OUT_RING(ring, reg);
+ }
+
+ if (gs)
+ OUT_PKT4(ring, REG_A6XX_SP_GS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
+ else if (ds)
+ OUT_PKT4(ring, REG_A6XX_SP_DS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
+ else
+ OUT_PKT4(ring, REG_A6XX_SP_VS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4));
+
+ for (j = 0; j < l.cnt;) {
+ uint32_t reg = 0;
+
+ reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
+ reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
+ reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
+ reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
+
+ OUT_RING(ring, reg);
+ }
+
+ if (hs) {
+ assert(vs->mergedregs == hs->mergedregs);
+ OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
+ OUT_RING(
+ ring,
+ A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
+ A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
+ A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
+
+ fd6_emit_shader(ctx, ring, hs);
+ fd6_emit_immediates(ctx->screen, hs, ring);
+ fd6_emit_link_map(ctx->screen, vs, hs, ring);
+
+ OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
+ OUT_RING(
+ ring,
+ A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
+ A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
+ COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
+
+ fd6_emit_shader(ctx, ring, ds);
+ fd6_emit_immediates(ctx->screen, ds, ring);
+ fd6_emit_link_map(ctx->screen, hs, ds, ring);
+
+ shader_info *hs_info = &hs->shader->nir->info;
+ OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
+ OUT_RING(ring, hs_info->tess.tcs_vertices_out);
+
+ /* Total attribute slots in HS incoming patch. */
+ OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1);
+ OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->output_size / 4);
+
+ const uint32_t wavesize = 64;
+ const uint32_t max_wave_input_size = 64;
+ const uint32_t patch_control_points = hs_info->tess.tcs_vertices_out;
+
+ /* note: if HS is really just the VS extended, then this
+ * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
+ * however that doesn't match the blob, and fails some dEQP tests.
+ */
+ uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
+ uint32_t max_prims_per_wave = max_wave_input_size * wavesize /
+ (vs->output_size * patch_control_points);
+ prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
+
+ uint32_t total_size =
+ vs->output_size * patch_control_points * prims_per_wave;
+ uint32_t wave_input_size = DIV_ROUND_UP(total_size, wavesize);
+
+ OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
+ OUT_RING(ring, wave_input_size);
+
+ shader_info *ds_info = &ds->shader->nir->info;
+ OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1);
+ uint32_t output;
+ if (ds_info->tess.point_mode)
+ output = TESS_POINTS;
+ else if (ds_info->tess.primitive_mode == GL_ISOLINES)
+ output = TESS_LINES;
+ else if (ds_info->tess.ccw)
+ output = TESS_CCW_TRIS;
+ else
+ output = TESS_CW_TRIS;
+
+ OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING(
+ fd6_gl2spacing(ds_info->tess.spacing)) |
+ A6XX_PC_TESS_CNTL_OUTPUT(output));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_DS_CLIP_CNTL, 1);
+ OUT_RING(ring, A6XX_VPC_DS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
+ A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
+ A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_DS_LAYER_CNTL, 1);
+ OUT_RING(ring, 0x0000ffff);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_DS_LAYER_CNTL, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_DS_CL_CNTL, 1);
+ OUT_RING(ring, A6XX_GRAS_DS_CL_CNTL_CLIP_MASK(clip_mask) |
+ A6XX_GRAS_DS_CL_CNTL_CULL_MASK(cull_mask));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VS_PACK, 1);
+ OUT_RING(ring, A6XX_VPC_VS_PACK_POSITIONLOC(pos_loc) |
+ A6XX_VPC_VS_PACK_PSIZELOC(255) |
+ A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_DS_PACK, 1);
+ OUT_RING(ring, A6XX_VPC_DS_PACK_POSITIONLOC(pos_loc) |
+ A6XX_VPC_DS_PACK_PSIZELOC(psize_loc) |
+ A6XX_VPC_DS_PACK_STRIDE_IN_VPC(l.max_loc));
+
+ OUT_PKT4(ring, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(l.cnt));
+
+ OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1);
+ OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
+ CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) |
+ A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
+
+ } else {
+ OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
+ OUT_RING(ring, 0);
+ }
+
+ OUT_PKT4(ring, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(l.cnt));
+
+ bool enable_varyings = fs->total_in > 0;
+
+ OUT_PKT4(ring, REG_A6XX_VPC_CNTL_0, 1);
+ OUT_RING(ring, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) |
+ COND(enable_varyings, A6XX_VPC_CNTL_0_VARYING) |
+ A6XX_VPC_CNTL_0_PRIMIDLOC(l.primid_loc) |
+ A6XX_VPC_CNTL_0_VIEWIDLOC(0xff));
+
+ OUT_PKT4(ring, REG_A6XX_PC_VS_OUT_CNTL, 1);
+ OUT_RING(ring, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
+ CONDREG(psize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
+ CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
+ A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
+
+ OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
+ OUT_RING(ring, 0x7); /* XXX */
+ OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
+ A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
+ A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
+ A6XX_HLSQ_CONTROL_2_REG_SIZE(ij_regid[IJ_PERSP_SIZE]));
+ OUT_RING(
+ ring,
+ A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
+ A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
+ A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(
+ ij_regid[IJ_PERSP_CENTROID]) |
+ A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(
+ ij_regid[IJ_LINEAR_CENTROID]));
+ OUT_RING(
+ ring,
+ A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
+ A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
+ A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
+ A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
+ OUT_RING(ring, 0xfc); /* XXX */
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1);
+ OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(fssz) |
+ COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS));
+
+ OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1);
+ OUT_RING(
+ ring,
+ A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
+ COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) | 0x1000000 |
+ A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
+ A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
+ COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
+ A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
+ COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
+ OUT_RING(ring, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
+ A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(0xff));
+
+ bool need_size = fs->frag_face || fs->fragcoord_compmask != 0;
+ bool need_size_persamp = false;
+ if (VALIDREG(ij_regid[IJ_PERSP_SIZE])) {
+ if (sample_shading)
+ need_size_persamp = true;
+ else
+ need_size = true;
+ }
+ if (VALIDREG(ij_regid[IJ_LINEAR_PIXEL]))
+ need_size = true;
+
+ /* XXX: enable bits for linear centroid and linear sample bary */
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1);
+ OUT_RING(
+ ring,
+ CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID],
+ A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
+ CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) |
+ COND(need_size, A6XX_GRAS_CNTL_SIZE) |
+ COND(need_size_persamp, A6XX_GRAS_CNTL_SIZE_PERSAMP) |
+ COND(fs->fragcoord_compmask != 0,
+ A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)));
+
+ OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2);
+ OUT_RING(
+ ring,
+ CONDREG(ij_regid[IJ_PERSP_PIXEL],
+ A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
+ CONDREG(ij_regid[IJ_PERSP_CENTROID],
+ A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
+ CONDREG(ij_regid[IJ_PERSP_SAMPLE],
+ A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) |
+ COND(need_size, A6XX_RB_RENDER_CONTROL0_SIZE) |
+ COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
+ COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) |
+ COND(fs->fragcoord_compmask != 0,
+ A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)));
+
+ OUT_RING(ring,
+ CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
+ CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
+ CONDREG(ij_regid[IJ_PERSP_SIZE], A6XX_RB_RENDER_CONTROL1_SIZE) |
+ COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
+
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_CNTL, 1);
+ OUT_RING(ring, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8101, 1);
+ OUT_RING(ring, COND(sample_shading, 0x6)); // XXX
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
+ OUT_RING(ring, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
+
+ OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
+ COND(color_regid[i] & HALF_REG_ID,
+ A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
+ if (VALIDREG(color_regid[i])) {
+ state->mrt_components |= 0xf << (i * 4);
+ }
+ }
+
+ /* dual source blending has an extra fs output in the 2nd slot */
+ if (fs_has_dual_src_color) {
+ state->mrt_components |= 0xf << 4;
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VS_PACK, 1);
+ OUT_RING(ring, A6XX_VPC_VS_PACK_POSITIONLOC(pos_loc) |
+ A6XX_VPC_VS_PACK_PSIZELOC(psize_loc) |
+ A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc));
+
+ if (gs) {
+ assert(gs->mergedregs == (ds ? ds->mergedregs : vs->mergedregs));
+ OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1);
+ OUT_RING(
+ ring,
+ A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
+ A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
+ A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
+
+ fd6_emit_shader(ctx, ring, gs);
+ fd6_emit_immediates(ctx->screen, gs, ring);
+ if (ds)
+ fd6_emit_link_map(ctx->screen, ds, gs, ring);
+ else
+ fd6_emit_link_map(ctx->screen, vs, gs, ring);
+
+ OUT_PKT4(ring, REG_A6XX_VPC_GS_PACK, 1);
+ OUT_RING(ring, A6XX_VPC_GS_PACK_POSITIONLOC(pos_loc) |
+ A6XX_VPC_GS_PACK_PSIZELOC(psize_loc) |
+ A6XX_VPC_GS_PACK_STRIDE_IN_VPC(l.max_loc));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_GS_LAYER_CNTL, 1);
+ OUT_RING(ring, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_GS_LAYER_CNTL, 1);
+ OUT_RING(ring,
+ CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
+
+ uint32_t flags_regid =
+ ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3);
+
+ OUT_PKT4(ring, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(l.cnt) |
+ A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
+
+ OUT_PKT4(ring, REG_A6XX_PC_GS_OUT_CNTL, 1);
+ OUT_RING(ring,
+ A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
+ CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
+ CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
+ CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
+ A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
+
+ uint32_t output;
+ switch (gs->shader->nir->info.gs.output_primitive) {
+ case GL_POINTS:
+ output = TESS_POINTS;
+ break;
+ case GL_LINE_STRIP:
+ output = TESS_LINES;
+ break;
+ case GL_TRIANGLE_STRIP:
+ output = TESS_CW_TRIS;
+ break;
+ default:
+ unreachable("");
+ }
+ OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
+ OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(
+ gs->shader->nir->info.gs.vertices_out - 1) |
+ A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
+ A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(
+ gs->shader->nir->info.gs.invocations - 1));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_GS_CL_CNTL, 1);
+ OUT_RING(ring, A6XX_GRAS_GS_CL_CNTL_CLIP_MASK(clip_mask) |
+ A6XX_GRAS_GS_CL_CNTL_CULL_MASK(cull_mask));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9100, 1);
+ OUT_RING(ring, 0xff);
+
+ OUT_PKT4(ring, REG_A6XX_VPC_GS_CLIP_CNTL, 1);
+ OUT_RING(ring, A6XX_VPC_GS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
+ A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
+ A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
+
+ const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs;
+
+ /* Size of per-primitive alloction in ldlw memory in vec4s. */
+ uint32_t vec4_size = gs->shader->nir->info.gs.vertices_in *
+ DIV_ROUND_UP(prev->output_size, 4);
+ OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
+ OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
+
+ OUT_PKT4(ring, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1);
+ OUT_RING(ring, prev->output_size);
+ } else {
+ OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
+ OUT_RING(ring, 0);
+ OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1);
+ OUT_RING(ring, 0);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_VS_LAYER_CNTL, 1);
+ OUT_RING(ring,
+ CONDREG(layer_regid, A6XX_GRAS_VS_LAYER_CNTL_WRITES_LAYER));
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VS_CLIP_CNTL, 1);
+ OUT_RING(ring, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
+ A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
+ A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_VS_CL_CNTL, 1);
+ OUT_RING(ring, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(clip_mask) |
+ A6XX_GRAS_VS_CL_CNTL_CULL_MASK(cull_mask));
+
+ OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
+ OUT_RING(ring, 0);
+
+ if (fs->instrlen)
+ fd6_emit_shader(ctx, ring, fs);
+
+ OUT_REG(ring, A6XX_PC_PRIMID_PASSTHRU(primid_passthru));
+
+ uint32_t non_sysval_input_count = 0;
+ for (uint32_t i = 0; i < vs->inputs_count; i++)
+ if (!vs->inputs[i].sysval)
+ non_sysval_input_count++;
+
+ OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
+ OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(non_sysval_input_count) |
+ A6XX_VFD_CONTROL_0_DECODE_CNT(non_sysval_input_count));
+
+ OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), non_sysval_input_count);
+ for (uint32_t i = 0; i < non_sysval_input_count; i++) {
+ assert(vs->inputs[i].compmask);
+ OUT_RING(ring,
+ A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) |
+ A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid));
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
+ OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
+ A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) |
+ 0xfc000000);
+ OUT_RING(ring,
+ A6XX_VFD_CONTROL_2_REGID_HSPATCHID(hs_patch_regid) |
+ A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
+ OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(ds_patch_regid) |
+ A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
+ A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) | 0xfc);
+ OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
+ OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) |
+ 0xfc00); /* VFD_CONTROL_5 */
+ OUT_RING(ring, COND(primid_passthru,
+ A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */
+
+ if (!binning_pass)
+ fd6_emit_immediates(ctx->screen, fs, ring);
}
-static void emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
- bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable);
+static void emit_interp_state(struct fd_ringbuffer *ring,
+ struct ir3_shader_variant *fs, bool rasterflat,
+ bool sprite_coord_mode,
+ uint32_t sprite_coord_enable);
static struct fd_ringbuffer *
create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
{
- struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
+ struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
- emit_interp_state(ring, state->fs, false, false, 0);
+ emit_interp_state(ring, state->fs, false, false, 0);
- return ring;
+ return ring;
}
/* build the program streaming state which is not part of the pre-
struct fd_ringbuffer *
fd6_program_interp_state(struct fd6_emit *emit)
{
- const struct fd6_program_state *state = fd6_emit_get_prog(emit);
+ const struct fd6_program_state *state = fd6_emit_get_prog(emit);
- if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
- /* fastpath: */
- return fd_ringbuffer_ref(state->interp_stateobj);
- } else {
- struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
- emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
+ if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
+ /* fastpath: */
+ return fd_ringbuffer_ref(state->interp_stateobj);
+ } else {
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+ emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
- emit_interp_state(ring, state->fs, emit->rasterflat,
- emit->sprite_coord_mode, emit->sprite_coord_enable);
+ emit_interp_state(ring, state->fs, emit->rasterflat,
+ emit->sprite_coord_mode, emit->sprite_coord_enable);
- return ring;
- }
+ return ring;
+ }
}
static void
emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
- bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable)
+ bool rasterflat, bool sprite_coord_mode,
+ uint32_t sprite_coord_enable)
{
- uint32_t vinterp[8], vpsrepl[8];
-
- memset(vinterp, 0, sizeof(vinterp));
- memset(vpsrepl, 0, sizeof(vpsrepl));
-
- for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
-
- /* NOTE: varyings are packed, so if compmask is 0xb
- * then first, third, and fourth component occupy
- * three consecutive varying slots:
- */
- unsigned compmask = fs->inputs[j].compmask;
-
- uint32_t inloc = fs->inputs[j].inloc;
-
- if (fs->inputs[j].flat ||
- (fs->inputs[j].rasterflat && rasterflat)) {
- uint32_t loc = inloc;
-
- for (int i = 0; i < 4; i++) {
- if (compmask & (1 << i)) {
- vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
- loc++;
- }
- }
- }
-
- bool coord_mode = sprite_coord_mode;
- if (ir3_point_sprite(fs, j, sprite_coord_enable, &coord_mode)) {
- /* mask is two 2-bit fields, where:
- * '01' -> S
- * '10' -> T
- * '11' -> 1 - T (flip mode)
- */
- unsigned mask = coord_mode ? 0b1101 : 0b1001;
- uint32_t loc = inloc;
- if (compmask & 0x1) {
- vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x2) {
- vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x4) {
- /* .z <- 0.0f */
- vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
- loc++;
- }
- if (compmask & 0x8) {
- /* .w <- 1.0f */
- vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
- loc++;
- }
- }
- }
-
- OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
- for (int i = 0; i < 8; i++)
- OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
-
- OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
- for (int i = 0; i < 8; i++)
- OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ uint32_t vinterp[8], vpsrepl[8];
+
+ memset(vinterp, 0, sizeof(vinterp));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
+
+ for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count;) {
+
+ /* NOTE: varyings are packed, so if compmask is 0xb
+ * then first, third, and fourth component occupy
+ * three consecutive varying slots:
+ */
+ unsigned compmask = fs->inputs[j].compmask;
+
+ uint32_t inloc = fs->inputs[j].inloc;
+
+ if (fs->inputs[j].flat || (fs->inputs[j].rasterflat && rasterflat)) {
+ uint32_t loc = inloc;
+
+ for (int i = 0; i < 4; i++) {
+ if (compmask & (1 << i)) {
+ vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+
+ bool coord_mode = sprite_coord_mode;
+ if (ir3_point_sprite(fs, j, sprite_coord_enable, &coord_mode)) {
+ /* mask is two 2-bit fields, where:
+ * '01' -> S
+ * '10' -> T
+ * '11' -> 1 - T (flip mode)
+ */
+ unsigned mask = coord_mode ? 0b1101 : 0b1001;
+ uint32_t loc = inloc;
+ if (compmask & 0x1) {
+ vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x2) {
+ vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x4) {
+ /* .z <- 0.0f */
+ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+ loc++;
+ }
+ if (compmask & 0x8) {
+ /* .w <- 1.0f */
+ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+ for (int i = 0; i < 8; i++)
+ OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (int i = 0; i < 8; i++)
+ OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
}
static struct ir3_program_state *
fd6_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs,
- struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds,
- struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key)
- in_dt
+ struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
+ struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
+ struct ir3_shader_variant *fs,
+ const struct ir3_shader_key *key) in_dt
{
- struct fd_context *ctx = fd_context(data);
- struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
-
- tc_assert_driver_thread(ctx->tc);
-
- /* if we have streamout, use full VS in binning pass, as the
- * binning pass VS will have outputs on other than position/psize
- * stripped out:
- */
- state->bs = vs->shader->stream_output.num_outputs ? vs : bs;
- state->vs = vs;
- state->hs = hs;
- state->ds = ds;
- state->gs = gs;
- state->fs = fs;
- state->config_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
- state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
- state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
- state->streamout_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
-
+ struct fd_context *ctx = fd_context(data);
+ struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
+
+ tc_assert_driver_thread(ctx->tc);
+
+ /* if we have streamout, use full VS in binning pass, as the
+ * binning pass VS will have outputs on other than position/psize
+ * stripped out:
+ */
+ state->bs = vs->shader->stream_output.num_outputs ? vs : bs;
+ state->vs = vs;
+ state->hs = hs;
+ state->ds = ds;
+ state->gs = gs;
+ state->fs = fs;
+ state->config_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+ state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+ state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+ state->streamout_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
#ifdef DEBUG
- if (!ds) {
- for (unsigned i = 0; i < bs->inputs_count; i++) {
- if (vs->inputs[i].sysval)
- continue;
- debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
- }
- }
+ if (!ds) {
+ for (unsigned i = 0; i < bs->inputs_count; i++) {
+ if (vs->inputs[i].sysval)
+ continue;
+ debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
+ }
+ }
#endif
- setup_config_stateobj(state->config_stateobj, state);
- setup_stateobj(state->binning_stateobj, ctx, state, key, true);
- setup_stateobj(state->stateobj, ctx, state, key, false);
- state->interp_stateobj = create_interp_stateobj(ctx, state);
+ setup_config_stateobj(state->config_stateobj, state);
+ setup_stateobj(state->binning_stateobj, ctx, state, key, true);
+ setup_stateobj(state->stateobj, ctx, state, key, false);
+ state->interp_stateobj = create_interp_stateobj(ctx, state);
- struct ir3_stream_output_info *stream_output =
- &fd6_last_shader(state)->shader->stream_output;
- if (stream_output->num_outputs > 0)
- state->stream_output = stream_output;
+ struct ir3_stream_output_info *stream_output =
+ &fd6_last_shader(state)->shader->stream_output;
+ if (stream_output->num_outputs > 0)
+ state->stream_output = stream_output;
- return &state->base;
+ return &state->base;
}
static void
fd6_program_destroy(void *data, struct ir3_program_state *state)
{
- struct fd6_program_state *so = fd6_program_state(state);
- fd_ringbuffer_del(so->stateobj);
- fd_ringbuffer_del(so->binning_stateobj);
- fd_ringbuffer_del(so->config_stateobj);
- fd_ringbuffer_del(so->interp_stateobj);
- fd_ringbuffer_del(so->streamout_stateobj);
- free(so);
+ struct fd6_program_state *so = fd6_program_state(state);
+ fd_ringbuffer_del(so->stateobj);
+ fd_ringbuffer_del(so->binning_stateobj);
+ fd_ringbuffer_del(so->config_stateobj);
+ fd_ringbuffer_del(so->interp_stateobj);
+ fd_ringbuffer_del(so->streamout_stateobj);
+ free(so);
}
static const struct ir3_cache_funcs cache_funcs = {
- .create_state = fd6_program_create,
- .destroy_state = fd6_program_destroy,
+ .create_state = fd6_program_create,
+ .destroy_state = fd6_program_destroy,
};
void
fd6_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
+ ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
- ir3_prog_init(pctx);
+ ir3_prog_init(pctx);
- fd_prog_init(pctx);
+ fd_prog_init(pctx);
}
struct fd6_emit;
struct fd6_program_state {
- struct ir3_program_state base;
- struct ir3_shader_variant *bs; /* binning pass vs */
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *hs;
- struct ir3_shader_variant *ds;
- struct ir3_shader_variant *gs;
- struct ir3_shader_variant *fs;
- struct fd_ringbuffer *config_stateobj;
- struct fd_ringbuffer *interp_stateobj;
- struct fd_ringbuffer *binning_stateobj;
- struct fd_ringbuffer *streamout_stateobj;
- struct fd_ringbuffer *stateobj;
+ struct ir3_program_state base;
+ struct ir3_shader_variant *bs; /* binning pass vs */
+ struct ir3_shader_variant *vs;
+ struct ir3_shader_variant *hs;
+ struct ir3_shader_variant *ds;
+ struct ir3_shader_variant *gs;
+ struct ir3_shader_variant *fs;
+ struct fd_ringbuffer *config_stateobj;
+ struct fd_ringbuffer *interp_stateobj;
+ struct fd_ringbuffer *binning_stateobj;
+ struct fd_ringbuffer *streamout_stateobj;
+ struct fd_ringbuffer *stateobj;
- struct ir3_stream_output_info *stream_output;
+ struct ir3_stream_output_info *stream_output;
- /**
- * Output components from frag shader. It is possible to have
- * a fragment shader that only writes a subset of the bound
- * render targets.
- */
- uint32_t mrt_components;
+ /**
+ * Output components from frag shader. It is possible to have
+ * a fragment shader that only writes a subset of the bound
+ * render targets.
+ */
+ uint32_t mrt_components;
};
static inline struct fd6_program_state *
fd6_program_state(struct ir3_program_state *state)
{
- return (struct fd6_program_state *)state;
+ return (struct fd6_program_state *)state;
}
static inline const struct ir3_shader_variant *
fd6_last_shader(const struct fd6_program_state *state)
{
- if (state->gs)
- return state->gs;
- else if (state->ds)
- return state->ds;
- else
- return state->vs;
+ if (state->gs)
+ return state->gs;
+ else if (state->ds)
+ return state->ds;
+ else
+ return state->vs;
}
void fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *so) assert_dt;
+ const struct ir3_shader_variant *so) assert_dt;
-struct fd_ringbuffer * fd6_program_interp_state(struct fd6_emit *emit) assert_dt;
+struct fd_ringbuffer *fd6_program_interp_state(struct fd6_emit *emit) assert_dt;
void fd6_prog_init(struct pipe_context *pctx);
#include "fd6_query.h"
struct PACKED fd6_query_sample {
- uint64_t start;
- uint64_t result;
- uint64_t stop;
+ uint64_t start;
+ uint64_t result;
+ uint64_t stop;
};
/* offset of a single field of an array of fd6_query_sample: */
-#define query_sample_idx(aq, idx, field) \
- fd_resource((aq)->prsc)->bo, \
- (idx * sizeof(struct fd6_query_sample)) + \
- offsetof(struct fd6_query_sample, field), \
- 0, 0
+#define query_sample_idx(aq, idx, field) \
+ fd_resource((aq)->prsc)->bo, \
+ (idx * sizeof(struct fd6_query_sample)) + \
+ offsetof(struct fd6_query_sample, field), \
+ 0, 0
/* offset of a single field of fd6_query_sample: */
-#define query_sample(aq, field) \
- query_sample_idx(aq, 0, field)
+#define query_sample(aq, field) query_sample_idx(aq, 0, field)
/*
* Occlusion Query:
static void
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
- OUT_RELOC(ring, query_sample(aq, start));
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
+ OUT_RELOC(ring, query_sample(aq, start));
- fd6_event_write(batch, ring, ZPASS_DONE, false);
+ fd6_event_write(batch, ring, ZPASS_DONE, false);
- fd6_context(batch->ctx)->samples_passed_queries++;
+ fd6_context(batch->ctx)->samples_passed_queries++;
}
static void
-occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT7(ring, CP_MEM_WRITE, 4);
- OUT_RELOC(ring, query_sample(aq, stop));
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0xffffffff);
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_RING(ring, 0xffffffff);
+ OUT_RING(ring, 0xffffffff);
- OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
+ OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
- OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
- OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
- OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
+ OUT_RELOC(ring, query_sample(aq, stop));
- fd6_event_write(batch, ring, ZPASS_DONE, false);
+ fd6_event_write(batch, ring, ZPASS_DONE, false);
- /* To avoid stalling in the draw buffer, emit code the code to compute the
- * counter delta in the epilogue ring.
- */
- struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
- fd_wfi(batch, epilogue);
+ /* To avoid stalling in the draw buffer, emit code the code to compute the
+ * counter delta in the epilogue ring.
+ */
+ struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
+ fd_wfi(batch, epilogue);
- /* result += stop - start: */
- OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
- OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
- OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
- OUT_RELOC(epilogue, query_sample(aq, stop)); /* srcB */
- OUT_RELOC(epilogue, query_sample(aq, start)); /* srcC */
+ /* result += stop - start: */
+ OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
+ OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
+ OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
+ OUT_RELOC(epilogue, query_sample(aq, stop)); /* srcB */
+ OUT_RELOC(epilogue, query_sample(aq, start)); /* srcC */
- fd6_context(batch->ctx)->samples_passed_queries--;
+ fd6_context(batch->ctx)->samples_passed_queries--;
}
static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_query_sample *sp = buf;
- result->u64 = sp->result;
+ struct fd6_query_sample *sp = buf;
+ result->u64 = sp->result;
}
static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_query_sample *sp = buf;
- result->b = !!sp->result;
+ struct fd6_query_sample *sp = buf;
+ result->b = !!sp->result;
}
static const struct fd_acc_sample_provider occlusion_counter = {
- .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
- .size = sizeof(struct fd6_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_counter_result,
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .size = sizeof(struct fd6_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_counter_result,
};
static const struct fd_acc_sample_provider occlusion_predicate = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
- .size = sizeof(struct fd6_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_predicate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .size = sizeof(struct fd6_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_predicate_result,
};
static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
- .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
- .size = sizeof(struct fd6_query_sample),
- .resume = occlusion_resume,
- .pause = occlusion_pause,
- .result = occlusion_predicate_result,
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .size = sizeof(struct fd6_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_predicate_result,
};
/*
static void
timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
- CP_EVENT_WRITE_0_TIMESTAMP);
- OUT_RELOC(ring, query_sample(aq, start));
- OUT_RING(ring, 0x00000000);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring,
+ CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
+ OUT_RELOC(ring, query_sample(aq, start));
+ OUT_RING(ring, 0x00000000);
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
}
static void
-time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
-
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
- CP_EVENT_WRITE_0_TIMESTAMP);
- OUT_RELOC(ring, query_sample(aq, stop));
- OUT_RING(ring, 0x00000000);
-
- fd_reset_wfi(batch);
- fd_wfi(batch, ring);
-
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(ring, query_sample(aq, result)); /* dst */
- OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
- OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
- OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
+ struct fd_ringbuffer *ring = batch->draw;
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring,
+ CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
+ OUT_RELOC(ring, query_sample(aq, stop));
+ OUT_RING(ring, 0x00000000);
+
+ fd_reset_wfi(batch);
+ fd_wfi(batch, ring);
+
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(ring, query_sample(aq, result)); /* dst */
+ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
+ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
}
static void
timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
- /* We captured a timestamp in timestamp_resume(), nothing to do here. */
+ /* We captured a timestamp in timestamp_resume(), nothing to do here. */
}
/* timestamp logging for u_trace: */
static void
record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
{
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
- CP_EVENT_WRITE_0_TIMESTAMP);
- OUT_RELOC(ring, bo, offset, 0, 0);
- OUT_RING(ring, 0x00000000);
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring,
+ CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
+ OUT_RELOC(ring, bo, offset, 0, 0);
+ OUT_RING(ring, 0x00000000);
}
static uint64_t
ticks_to_ns(uint64_t ts)
{
- /* This is based on the 19.2MHz always-on rbbm timer.
- *
- * TODO we should probably query this value from kernel..
- */
- return ts * (1000000000 / 19200000);
+ /* This is based on the 19.2MHz always-on rbbm timer.
+ *
+ * TODO we should probably query this value from kernel..
+ */
+ return ts * (1000000000 / 19200000);
}
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_query_sample *sp = buf;
- result->u64 = ticks_to_ns(sp->result);
+ struct fd6_query_sample *sp = buf;
+ result->u64 = ticks_to_ns(sp->result);
}
static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_query_sample *sp = buf;
- result->u64 = ticks_to_ns(sp->start);
+ struct fd6_query_sample *sp = buf;
+ result->u64 = ticks_to_ns(sp->start);
}
static const struct fd_acc_sample_provider time_elapsed = {
- .query_type = PIPE_QUERY_TIME_ELAPSED,
- .always = true,
- .size = sizeof(struct fd6_query_sample),
- .resume = timestamp_resume,
- .pause = time_elapsed_pause,
- .result = time_elapsed_accumulate_result,
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .always = true,
+ .size = sizeof(struct fd6_query_sample),
+ .resume = timestamp_resume,
+ .pause = time_elapsed_pause,
+ .result = time_elapsed_accumulate_result,
};
/* NOTE: timestamp query isn't going to give terribly sensible results
*/
static const struct fd_acc_sample_provider timestamp = {
- .query_type = PIPE_QUERY_TIMESTAMP,
- .always = true,
- .size = sizeof(struct fd6_query_sample),
- .resume = timestamp_resume,
- .pause = timestamp_pause,
- .result = timestamp_accumulate_result,
+ .query_type = PIPE_QUERY_TIMESTAMP,
+ .always = true,
+ .size = sizeof(struct fd6_query_sample),
+ .resume = timestamp_resume,
+ .pause = timestamp_pause,
+ .result = timestamp_accumulate_result,
};
struct PACKED fd6_primitives_sample {
- struct {
- uint64_t emitted, generated;
- } start[4], stop[4], result;
+ struct {
+ uint64_t emitted, generated;
+ } start[4], stop[4], result;
- uint64_t prim_start[16], prim_stop[16], prim_emitted;
+ uint64_t prim_start[16], prim_stop[16], prim_emitted;
};
-
-#define primitives_relocw(ring, aq, field) \
- OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
-#define primitives_reloc(ring, aq, field) \
- OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
+#define primitives_relocw(ring, aq, field) \
+ OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
+ offsetof(struct fd6_primitives_sample, field), 0, 0);
+#define primitives_reloc(ring, aq, field) \
+ OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
+ offsetof(struct fd6_primitives_sample, field), 0, 0);
#ifdef DEBUG_COUNTERS
static const unsigned counter_count = 10;
static void
log_counters(struct fd6_primitives_sample *ps)
{
- const char *labels[] = {
- "vs_vertices_in",
- "vs_primitives_out",
- "hs_vertices_in",
- "hs_patches_out",
- "ds_vertices_in",
- "ds_primitives_out",
- "gs_primitives_in",
- "gs_primitives_out",
- "ras_primitives_in",
- "x",
- };
-
- printf(" counter\t\tstart\t\t\tstop\t\t\tdiff\n");
- for (int i = 0; i < ARRAY_SIZE(labels); i++) {
- int register_idx = i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2;
- printf(" RBBM_PRIMCTR_%d\t0x%016"PRIx64"\t0x%016"PRIx64"\t%"PRIi64"\t%s\n",
- register_idx,
- ps->prim_start[i], ps->prim_stop[i], ps->prim_stop[i] - ps->prim_start[i],
- labels[register_idx]);
- }
-
- printf(" so counts\n");
- for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
- printf(" CHANNEL %d emitted\t0x%016"PRIx64"\t0x%016"PRIx64"\t%"PRIi64"\n",
- i, ps->start[i].generated, ps->stop[i].generated, ps->stop[i].generated - ps->start[i].generated);
- printf(" CHANNEL %d generated\t0x%016"PRIx64"\t0x%016"PRIx64"\t%"PRIi64"\n",
- i, ps->start[i].emitted, ps->stop[i].emitted, ps->stop[i].emitted - ps->start[i].emitted);
- }
-
- printf("generated %"PRIu64", emitted %"PRIu64"\n", ps->result.generated, ps->result.emitted);
+ const char *labels[] = {
+ "vs_vertices_in", "vs_primitives_out",
+ "hs_vertices_in", "hs_patches_out",
+ "ds_vertices_in", "ds_primitives_out",
+ "gs_primitives_in", "gs_primitives_out",
+ "ras_primitives_in", "x",
+ };
+
+ printf(" counter\t\tstart\t\t\tstop\t\t\tdiff\n");
+ for (int i = 0; i < ARRAY_SIZE(labels); i++) {
+ int register_idx = i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2;
+ printf(" RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64
+ "\t%s\n",
+ register_idx, ps->prim_start[i], ps->prim_stop[i],
+ ps->prim_stop[i] - ps->prim_start[i], labels[register_idx]);
+ }
+
+ printf(" so counts\n");
+ for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
+ printf(" CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
+ "\t%" PRIi64 "\n",
+ i, ps->start[i].generated, ps->stop[i].generated,
+ ps->stop[i].generated - ps->start[i].generated);
+ printf(" CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
+ "\t%" PRIi64 "\n",
+ i, ps->start[i].emitted, ps->stop[i].emitted,
+ ps->stop[i].emitted - ps->start[i].emitted);
+ }
+
+ printf("generated %" PRIu64 ", emitted %" PRIu64 "\n", ps->result.generated,
+ ps->result.emitted);
}
#else
#endif
static void
-primitives_generated_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+primitives_generated_resume(struct fd_acc_query *aq,
+ struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_CNT(counter_count * 2) |
- CP_REG_TO_MEM_0_REG(counter_base));
- primitives_relocw(ring, aq, prim_start);
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
+ CP_REG_TO_MEM_0_REG(counter_base));
+ primitives_relocw(ring, aq, prim_start);
- fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
+ fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
}
static void
-primitives_generated_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+primitives_generated_pause(struct fd_acc_query *aq,
+ struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
-
- fd_wfi(batch, ring);
-
- /* snapshot the end values: */
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_CNT(counter_count * 2) |
- CP_REG_TO_MEM_0_REG(counter_base));
- primitives_relocw(ring, aq, prim_stop);
-
- fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
-
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
- primitives_relocw(ring, aq, result.generated);
- primitives_reloc(ring, aq, prim_emitted);
- primitives_reloc(ring, aq, prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
- primitives_reloc(ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
+ struct fd_ringbuffer *ring = batch->draw;
+
+ fd_wfi(batch, ring);
+
+ /* snapshot the end values: */
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
+ CP_REG_TO_MEM_0_REG(counter_base));
+ primitives_relocw(ring, aq, prim_stop);
+
+ fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
+
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
+ primitives_relocw(ring, aq, result.generated);
+ primitives_reloc(ring, aq, prim_emitted);
+ primitives_reloc(ring, aq,
+ prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
+ primitives_reloc(
+ ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
}
static void
primitives_generated_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_primitives_sample *ps = buf;
+ struct fd6_primitives_sample *ps = buf;
- log_counters(ps);
+ log_counters(ps);
- result->u64 = ps->result.generated;
+ result->u64 = ps->result.generated;
}
static const struct fd_acc_sample_provider primitives_generated = {
- .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
- .size = sizeof(struct fd6_primitives_sample),
- .resume = primitives_generated_resume,
- .pause = primitives_generated_pause,
- .result = primitives_generated_result,
+ .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
+ .size = sizeof(struct fd6_primitives_sample),
+ .resume = primitives_generated_resume,
+ .pause = primitives_generated_pause,
+ .result = primitives_generated_result,
};
static void
-primitives_emitted_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+primitives_emitted_resume(struct fd_acc_query *aq,
+ struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- fd_wfi(batch, ring);
- OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
- primitives_relocw(ring, aq, start[0]);
+ fd_wfi(batch, ring);
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
+ primitives_relocw(ring, aq, start[0]);
- fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
+ fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
}
static void
-primitives_emitted_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+primitives_emitted_pause(struct fd_acc_query *aq,
+ struct fd_batch *batch) assert_dt
{
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_ringbuffer *ring = batch->draw;
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
- primitives_relocw(ring, aq, stop[0]);
- fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
+ primitives_relocw(ring, aq, stop[0]);
+ fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
- fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
+ fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
- primitives_relocw(ring, aq, result.emitted);
- primitives_reloc(ring, aq, result.emitted);
- primitives_reloc(ring, aq, stop[aq->base.index].emitted);
- primitives_reloc(ring, aq, start[aq->base.index].emitted);
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
+ primitives_relocw(ring, aq, result.emitted);
+ primitives_reloc(ring, aq, result.emitted);
+ primitives_reloc(ring, aq, stop[aq->base.index].emitted);
+ primitives_reloc(ring, aq, start[aq->base.index].emitted);
}
static void
primitives_emitted_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd6_primitives_sample *ps = buf;
+ struct fd6_primitives_sample *ps = buf;
- log_counters(ps);
+ log_counters(ps);
- result->u64 = ps->result.emitted;
+ result->u64 = ps->result.emitted;
}
static const struct fd_acc_sample_provider primitives_emitted = {
- .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
- .size = sizeof(struct fd6_primitives_sample),
- .resume = primitives_emitted_resume,
- .pause = primitives_emitted_pause,
- .result = primitives_emitted_result,
+ .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
+ .size = sizeof(struct fd6_primitives_sample),
+ .resume = primitives_emitted_resume,
+ .pause = primitives_emitted_pause,
+ .result = primitives_emitted_result,
};
/*
*/
struct fd_batch_query_entry {
- uint8_t gid; /* group-id */
- uint8_t cid; /* countable-id within the group */
+ uint8_t gid; /* group-id */
+ uint8_t cid; /* countable-id within the group */
};
struct fd_batch_query_data {
- struct fd_screen *screen;
- unsigned num_query_entries;
- struct fd_batch_query_entry query_entries[];
+ struct fd_screen *screen;
+ unsigned num_query_entries;
+ struct fd_batch_query_entry query_entries[];
};
static void
-perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
- /* configure performance counters for the requested queries: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
+ /* configure performance counters for the requested queries: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
- debug_assert(counter_idx < g->num_counters);
+ debug_assert(counter_idx < g->num_counters);
- OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
- OUT_RING(ring, g->countables[entry->cid].selector);
- }
+ OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
+ OUT_RING(ring, g->countables[entry->cid].selector);
+ }
- memset(counters_per_group, 0, sizeof(counters_per_group));
+ memset(counters_per_group, 0, sizeof(counters_per_group));
- /* and snapshot the start values */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+ /* and snapshot the start values */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
- OUT_RELOC(ring, query_sample_idx(aq, i, start));
- }
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOC(ring, query_sample_idx(aq, i, start));
+ }
}
static void
-perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd_screen *screen = data->screen;
- struct fd_ringbuffer *ring = batch->draw;
-
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
-
- fd_wfi(batch, ring);
-
- /* TODO do we need to bother to turn anything off? */
-
- /* snapshot the end values: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
- unsigned counter_idx = counters_per_group[entry->gid]++;
- const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
-
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, CP_REG_TO_MEM_0_64B |
- CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
- OUT_RELOC(ring, query_sample_idx(aq, i, stop));
- }
-
- /* and compute the result: */
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- /* result += stop - start: */
- OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
- OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
- CP_MEM_TO_MEM_0_NEG_C);
- OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
- OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
- OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
- OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
- }
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd_screen *screen = data->screen;
+ struct fd_ringbuffer *ring = batch->draw;
+
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ fd_wfi(batch, ring);
+
+ /* TODO do we need to bother to turn anything off? */
+
+ /* snapshot the end values: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+ unsigned counter_idx = counters_per_group[entry->gid]++;
+ const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOC(ring, query_sample_idx(aq, i, stop));
+ }
+
+ /* and compute the result: */
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
+ OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
+ OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
+ }
}
static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result)
+ union pipe_query_result *result)
{
- struct fd_batch_query_data *data = aq->query_data;
- struct fd6_query_sample *sp = buf;
+ struct fd_batch_query_data *data = aq->query_data;
+ struct fd6_query_sample *sp = buf;
- for (unsigned i = 0; i < data->num_query_entries; i++) {
- result->batch[i].u64 = sp[i].result;
- }
+ for (unsigned i = 0; i < data->num_query_entries; i++) {
+ result->batch[i].u64 = sp[i].result;
+ }
}
static const struct fd_acc_sample_provider perfcntr = {
- .query_type = FD_QUERY_FIRST_PERFCNTR,
- .always = true,
- .resume = perfcntr_resume,
- .pause = perfcntr_pause,
- .result = perfcntr_accumulate_result,
+ .query_type = FD_QUERY_FIRST_PERFCNTR,
+ .always = true,
+ .resume = perfcntr_resume,
+ .pause = perfcntr_pause,
+ .result = perfcntr_accumulate_result,
};
static struct pipe_query *
-fd6_create_batch_query(struct pipe_context *pctx,
- unsigned num_queries, unsigned *query_types)
+fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
+ unsigned *query_types)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_screen *screen = ctx->screen;
- struct fd_query *q;
- struct fd_acc_query *aq;
- struct fd_batch_query_data *data;
-
- data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
- num_queries * sizeof(data->query_entries[0]));
-
- data->screen = screen;
- data->num_query_entries = num_queries;
-
- /* validate the requested query_types and ensure we don't try
- * to request more query_types of a given group than we have
- * counters:
- */
- unsigned counters_per_group[screen->num_perfcntr_groups];
- memset(counters_per_group, 0, sizeof(counters_per_group));
-
- for (unsigned i = 0; i < num_queries; i++) {
- unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
-
- /* verify valid query_type, ie. is it actually a perfcntr? */
- if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
- (idx >= screen->num_perfcntr_queries)) {
- mesa_loge("invalid batch query query_type: %u", query_types[i]);
- goto error;
- }
-
- struct fd_batch_query_entry *entry = &data->query_entries[i];
- struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
-
- entry->gid = pq->group_id;
-
- /* the perfcntr_queries[] table flattens all the countables
- * for each group in series, ie:
- *
- * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
- *
- * So to find the countable index just step back through the
- * table to find the first entry with the same group-id.
- */
- while (pq > screen->perfcntr_queries) {
- pq--;
- if (pq->group_id == entry->gid)
- entry->cid++;
- }
-
- if (counters_per_group[entry->gid] >=
- screen->perfcntr_groups[entry->gid].num_counters) {
- mesa_loge("too many counters for group %u", entry->gid);
- goto error;
- }
-
- counters_per_group[entry->gid]++;
- }
-
- q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
- aq = fd_acc_query(q);
-
- /* sample buffer size is based on # of queries: */
- aq->size = num_queries * sizeof(struct fd6_query_sample);
- aq->query_data = data;
-
- return (struct pipe_query *)q;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_screen *screen = ctx->screen;
+ struct fd_query *q;
+ struct fd_acc_query *aq;
+ struct fd_batch_query_data *data;
+
+ data = CALLOC_VARIANT_LENGTH_STRUCT(
+ fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
+
+ data->screen = screen;
+ data->num_query_entries = num_queries;
+
+ /* validate the requested query_types and ensure we don't try
+ * to request more query_types of a given group than we have
+ * counters:
+ */
+ unsigned counters_per_group[screen->num_perfcntr_groups];
+ memset(counters_per_group, 0, sizeof(counters_per_group));
+
+ for (unsigned i = 0; i < num_queries; i++) {
+ unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+ /* verify valid query_type, ie. is it actually a perfcntr? */
+ if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+ (idx >= screen->num_perfcntr_queries)) {
+ mesa_loge("invalid batch query query_type: %u", query_types[i]);
+ goto error;
+ }
+
+ struct fd_batch_query_entry *entry = &data->query_entries[i];
+ struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+ entry->gid = pq->group_id;
+
+ /* the perfcntr_queries[] table flattens all the countables
+ * for each group in series, ie:
+ *
+ * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+ *
+ * So to find the countable index just step back through the
+ * table to find the first entry with the same group-id.
+ */
+ while (pq > screen->perfcntr_queries) {
+ pq--;
+ if (pq->group_id == entry->gid)
+ entry->cid++;
+ }
+
+ if (counters_per_group[entry->gid] >=
+ screen->perfcntr_groups[entry->gid].num_counters) {
+ mesa_loge("too many counters for group %u", entry->gid);
+ goto error;
+ }
+
+ counters_per_group[entry->gid]++;
+ }
+
+ q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
+ aq = fd_acc_query(q);
+
+ /* sample buffer size is based on # of queries: */
+ aq->size = num_queries * sizeof(struct fd6_query_sample);
+ aq->query_data = data;
+
+ return (struct pipe_query *)q;
error:
- free(data);
- return NULL;
+ free(data);
+ return NULL;
}
void
-fd6_query_context_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->create_query = fd_acc_create_query;
- ctx->query_update_batch = fd_acc_query_update_batch;
+ ctx->create_query = fd_acc_create_query;
+ ctx->query_update_batch = fd_acc_query_update_batch;
- ctx->record_timestamp = record_timestamp;
- ctx->ts_to_ns = ticks_to_ns;
+ ctx->record_timestamp = record_timestamp;
+ ctx->ts_to_ns = ticks_to_ns;
- pctx->create_batch_query = fd6_create_batch_query;
+ pctx->create_batch_query = fd6_create_batch_query;
- fd_acc_query_register_provider(pctx, &occlusion_counter);
- fd_acc_query_register_provider(pctx, &occlusion_predicate);
- fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
+ fd_acc_query_register_provider(pctx, &occlusion_counter);
+ fd_acc_query_register_provider(pctx, &occlusion_predicate);
+ fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
- fd_acc_query_register_provider(pctx, &time_elapsed);
- fd_acc_query_register_provider(pctx, ×tamp);
+ fd_acc_query_register_provider(pctx, &time_elapsed);
+ fd_acc_query_register_provider(pctx, ×tamp);
- fd_acc_query_register_provider(pctx, &primitives_generated);
- fd_acc_query_register_provider(pctx, &primitives_emitted);
+ fd_acc_query_register_provider(pctx, &primitives_generated);
+ fd_acc_query_register_provider(pctx, &primitives_emitted);
}
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd6_rasterizer.h"
#include "fd6_context.h"
#include "fd6_format.h"
#include "fd6_pack.h"
+#include "fd6_rasterizer.h"
struct fd_ringbuffer *
__fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
- const struct pipe_rasterizer_state *cso, bool primitive_restart)
+ const struct pipe_rasterizer_state *cso,
+ bool primitive_restart)
{
- struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
- float psize_min, psize_max;
-
- if (cso->point_size_per_vertex) {
- psize_min = util_get_min_point_size(cso);
- psize_max = 4092;
- } else {
- /* Force the point size to be as if the vertex output was disabled. */
- psize_min = cso->point_size;
- psize_max = cso->point_size;
- }
-
- OUT_REG(ring,
- A6XX_GRAS_CL_CNTL(
- .znear_clip_disable = !cso->depth_clip_near,
- .zfar_clip_disable = !cso->depth_clip_far,
- .unk5 = !cso->depth_clip_near || !cso->depth_clip_far,
- .vp_clip_code_ignore = 1,
- .zero_gb_scale_z = cso->clip_halfz
- ));
-
- OUT_REG(ring,
- A6XX_GRAS_SU_CNTL(
- .linehalfwidth = cso->line_width / 2.0,
- .poly_offset = cso->offset_tri,
- .msaa_enable = cso->multisample,
- .cull_front = cso->cull_face & PIPE_FACE_FRONT,
- .cull_back = cso->cull_face & PIPE_FACE_BACK,
- .front_cw = !cso->front_ccw,
- ));
-
- OUT_REG(ring,
- A6XX_GRAS_SU_POINT_MINMAX(
- .min = psize_min,
- .max = psize_max,
- ),
- A6XX_GRAS_SU_POINT_SIZE(
- cso->point_size
- ));
-
- OUT_REG(ring,
- A6XX_GRAS_SU_POLY_OFFSET_SCALE(
- cso->offset_scale
- ),
- A6XX_GRAS_SU_POLY_OFFSET_OFFSET(
- cso->offset_units
- ),
- A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(
- cso->offset_clamp
- ));
-
- OUT_REG(ring,
- A6XX_PC_PRIMITIVE_CNTL_0(
- .provoking_vtx_last = !cso->flatshade_first,
- .primitive_restart = primitive_restart,
- ));
-
- enum a6xx_polygon_mode mode = POLYMODE6_TRIANGLES;
- switch (cso->fill_front) {
- case PIPE_POLYGON_MODE_POINT:
- mode = POLYMODE6_POINTS;
- break;
- case PIPE_POLYGON_MODE_LINE:
- mode = POLYMODE6_LINES;
- break;
- default:
- assert(cso->fill_front == PIPE_POLYGON_MODE_FILL);
- break;
- }
-
- OUT_REG(ring, A6XX_VPC_POLYGON_MODE(mode));
- OUT_REG(ring, A6XX_PC_POLYGON_MODE(mode));
-
- return ring;
+ struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
+ float psize_min, psize_max;
+
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 4092;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
+
+ OUT_REG(ring, A6XX_GRAS_CL_CNTL(.znear_clip_disable = !cso->depth_clip_near,
+ .zfar_clip_disable = !cso->depth_clip_far,
+ .unk5 = !cso->depth_clip_near ||
+ !cso->depth_clip_far,
+ .vp_clip_code_ignore = 1,
+ .zero_gb_scale_z = cso->clip_halfz));
+
+ OUT_REG(ring,
+ A6XX_GRAS_SU_CNTL(.linehalfwidth = cso->line_width / 2.0,
+ .poly_offset = cso->offset_tri,
+ .msaa_enable = cso->multisample,
+ .cull_front = cso->cull_face & PIPE_FACE_FRONT,
+ .cull_back = cso->cull_face & PIPE_FACE_BACK,
+ .front_cw = !cso->front_ccw, ));
+
+ OUT_REG(ring,
+ A6XX_GRAS_SU_POINT_MINMAX(.min = psize_min, .max = psize_max, ),
+ A6XX_GRAS_SU_POINT_SIZE(cso->point_size));
+
+ OUT_REG(ring, A6XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale),
+ A6XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units),
+ A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp));
+
+ OUT_REG(ring,
+ A6XX_PC_PRIMITIVE_CNTL_0(.provoking_vtx_last = !cso->flatshade_first,
+ .primitive_restart = primitive_restart, ));
+
+ enum a6xx_polygon_mode mode = POLYMODE6_TRIANGLES;
+ switch (cso->fill_front) {
+ case PIPE_POLYGON_MODE_POINT:
+ mode = POLYMODE6_POINTS;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ mode = POLYMODE6_LINES;
+ break;
+ default:
+ assert(cso->fill_front == PIPE_POLYGON_MODE_FILL);
+ break;
+ }
+
+ OUT_REG(ring, A6XX_VPC_POLYGON_MODE(mode));
+ OUT_REG(ring, A6XX_PC_POLYGON_MODE(mode));
+
+ return ring;
}
void *
fd6_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso)
+ const struct pipe_rasterizer_state *cso)
{
- struct fd6_rasterizer_stateobj *so;
+ struct fd6_rasterizer_stateobj *so;
- so = CALLOC_STRUCT(fd6_rasterizer_stateobj);
- if (!so)
- return NULL;
+ so = CALLOC_STRUCT(fd6_rasterizer_stateobj);
+ if (!so)
+ return NULL;
- so->base = *cso;
+ so->base = *cso;
- return so;
+ return so;
}
void
fd6_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd6_rasterizer_stateobj *so = hwcso;
+ struct fd6_rasterizer_stateobj *so = hwcso;
- for (unsigned i = 0; i < ARRAY_SIZE(so->stateobjs); i++)
- if (so->stateobjs[i])
- fd_ringbuffer_del(so->stateobjs[i]);
+ for (unsigned i = 0; i < ARRAY_SIZE(so->stateobjs); i++)
+ if (so->stateobjs[i])
+ fd_ringbuffer_del(so->stateobjs[i]);
- FREE(hwcso);
+ FREE(hwcso);
}
-
#ifndef FD6_RASTERIZER_H_
#define FD6_RASTERIZER_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_context.h"
struct fd6_rasterizer_stateobj {
- struct pipe_rasterizer_state base;
+ struct pipe_rasterizer_state base;
- struct fd_ringbuffer *stateobjs[2];
+ struct fd_ringbuffer *stateobjs[2];
};
static inline struct fd6_rasterizer_stateobj *
fd6_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
- return (struct fd6_rasterizer_stateobj *)rast;
+ return (struct fd6_rasterizer_stateobj *)rast;
}
-void * fd6_rasterizer_state_create(struct pipe_context *pctx,
- const struct pipe_rasterizer_state *cso);
+void *fd6_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
void fd6_rasterizer_state_delete(struct pipe_context *, void *hwcso);
-struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
- const struct pipe_rasterizer_state *cso, bool primitive_restart);
+struct fd_ringbuffer *
+__fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
+ const struct pipe_rasterizer_state *cso,
+ bool primitive_restart);
static inline struct fd_ringbuffer *
-fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart)
- assert_dt
+fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt
{
- struct fd6_rasterizer_stateobj *rasterizer = fd6_rasterizer_stateobj(ctx->rasterizer);
- unsigned variant = primitive_restart;
+ struct fd6_rasterizer_stateobj *rasterizer =
+ fd6_rasterizer_stateobj(ctx->rasterizer);
+ unsigned variant = primitive_restart;
- if (unlikely(!rasterizer->stateobjs[variant])) {
- rasterizer->stateobjs[variant] =
- __fd6_setup_rasterizer_stateobj(ctx, ctx->rasterizer, primitive_restart);
- }
+ if (unlikely(!rasterizer->stateobjs[variant])) {
+ rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj(
+ ctx, ctx->rasterizer, primitive_restart);
+ }
- return rasterizer->stateobjs[variant];
+ return rasterizer->stateobjs[variant];
}
#endif /* FD6_RASTERIZER_H_ */
#include "drm-uapi/drm_fourcc.h"
-#include "fd6_resource.h"
#include "fd6_format.h"
+#include "fd6_resource.h"
#include "a6xx.xml.h"
static bool
ok_ubwc_format(struct pipe_screen *pscreen, enum pipe_format pfmt)
{
- switch (pfmt) {
- case PIPE_FORMAT_X24S8_UINT:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- /* We can't sample stencil with UBWC on a630, and we may need to be able
- * to sample stencil at some point. We can't just use
- * fd_resource_uncompress() at the point of stencil sampling because
- * that itself uses stencil sampling in the fd_blitter_blit path.
- */
- return fd_screen(pscreen)->info.a6xx.has_z24uint_s8uint;
-
- case PIPE_FORMAT_R8_G8B8_420_UNORM:
- return true;
-
- default:
- break;
- }
-
- switch (fd6_pipe2color(pfmt)) {
- case FMT6_10_10_10_2_UINT:
- case FMT6_10_10_10_2_UNORM_DEST:
- case FMT6_11_11_10_FLOAT:
- case FMT6_16_FLOAT:
- case FMT6_16_16_16_16_FLOAT:
- case FMT6_16_16_16_16_SINT:
- case FMT6_16_16_16_16_UINT:
- case FMT6_16_16_FLOAT:
- case FMT6_16_16_SINT:
- case FMT6_16_16_UINT:
- case FMT6_16_SINT:
- case FMT6_16_UINT:
- case FMT6_32_32_32_32_SINT:
- case FMT6_32_32_32_32_UINT:
- case FMT6_32_32_SINT:
- case FMT6_32_32_UINT:
- case FMT6_5_6_5_UNORM:
- case FMT6_8_8_8_8_SINT:
- case FMT6_8_8_8_8_UINT:
- case FMT6_8_8_8_8_UNORM:
- case FMT6_8_8_8_X8_UNORM:
- case FMT6_8_8_SINT:
- case FMT6_8_8_UINT:
- case FMT6_8_8_UNORM:
- case FMT6_8_UNORM:
- case FMT6_Z24_UNORM_S8_UINT:
- case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
- return true;
- default:
- return false;
- }
+ switch (pfmt) {
+ case PIPE_FORMAT_X24S8_UINT:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ /* We can't sample stencil with UBWC on a630, and we may need to be able
+ * to sample stencil at some point. We can't just use
+ * fd_resource_uncompress() at the point of stencil sampling because
+ * that itself uses stencil sampling in the fd_blitter_blit path.
+ */
+ return fd_screen(pscreen)->info.a6xx.has_z24uint_s8uint;
+
+ case PIPE_FORMAT_R8_G8B8_420_UNORM:
+ return true;
+
+ default:
+ break;
+ }
+
+ switch (fd6_pipe2color(pfmt)) {
+ case FMT6_10_10_10_2_UINT:
+ case FMT6_10_10_10_2_UNORM_DEST:
+ case FMT6_11_11_10_FLOAT:
+ case FMT6_16_FLOAT:
+ case FMT6_16_16_16_16_FLOAT:
+ case FMT6_16_16_16_16_SINT:
+ case FMT6_16_16_16_16_UINT:
+ case FMT6_16_16_FLOAT:
+ case FMT6_16_16_SINT:
+ case FMT6_16_16_UINT:
+ case FMT6_16_SINT:
+ case FMT6_16_UINT:
+ case FMT6_32_32_32_32_SINT:
+ case FMT6_32_32_32_32_UINT:
+ case FMT6_32_32_SINT:
+ case FMT6_32_32_UINT:
+ case FMT6_5_6_5_UNORM:
+ case FMT6_8_8_8_8_SINT:
+ case FMT6_8_8_8_8_UINT:
+ case FMT6_8_8_8_8_UNORM:
+ case FMT6_8_8_8_X8_UNORM:
+ case FMT6_8_8_SINT:
+ case FMT6_8_8_UINT:
+ case FMT6_8_8_UNORM:
+ case FMT6_8_UNORM:
+ case FMT6_Z24_UNORM_S8_UINT:
+ case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+ return true;
+ default:
+ return false;
+ }
}
static bool
can_do_ubwc(struct pipe_resource *prsc)
{
- /* limit things to simple single level 2d for now: */
- if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
- return false;
- if (prsc->target != PIPE_TEXTURE_2D)
- return false;
- if (!ok_ubwc_format(prsc->screen, prsc->format))
- return false;
- return true;
+ /* limit things to simple single level 2d for now: */
+ if ((prsc->depth0 != 1) || (prsc->array_size != 1) ||
+ (prsc->last_level != 0))
+ return false;
+ if (prsc->target != PIPE_TEXTURE_2D)
+ return false;
+ if (!ok_ubwc_format(prsc->screen, prsc->format))
+ return false;
+ return true;
}
/**
*/
void
fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
- enum pipe_format format)
+ enum pipe_format format)
{
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- if (!rsc->layout.ubwc)
- return;
+ if (!rsc->layout.ubwc)
+ return;
- if (ok_ubwc_format(rsc->b.b.screen, format))
- return;
+ if (ok_ubwc_format(rsc->b.b.screen, format))
+ return;
- perf_debug_ctx(ctx, "%"PRSC_FMT": demoted to uncompressed due to use as %s",
- PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
+ perf_debug_ctx(ctx,
+ "%" PRSC_FMT ": demoted to uncompressed due to use as %s",
+ PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
- fd_resource_uncompress(ctx, rsc);
+ fd_resource_uncompress(ctx, rsc);
}
static void
setup_lrz(struct fd_resource *rsc)
{
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
- DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
- unsigned width0 = rsc->b.b.width0;
- unsigned height0 = rsc->b.b.height0;
-
- /* LRZ buffer is super-sampled: */
- switch (rsc->b.b.nr_samples) {
- case 4:
- width0 *= 2;
- FALLTHROUGH;
- case 2:
- height0 *= 2;
- }
-
- unsigned lrz_pitch = align(DIV_ROUND_UP(width0, 8), 32);
- unsigned lrz_height = align(DIV_ROUND_UP(height0, 8), 16);
-
- unsigned size = lrz_pitch * lrz_height * 2;
-
- rsc->lrz_height = lrz_height;
- rsc->lrz_width = lrz_pitch;
- rsc->lrz_pitch = lrz_pitch;
- rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ const uint32_t flags =
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+ unsigned width0 = rsc->b.b.width0;
+ unsigned height0 = rsc->b.b.height0;
+
+ /* LRZ buffer is super-sampled: */
+ switch (rsc->b.b.nr_samples) {
+ case 4:
+ width0 *= 2;
+ FALLTHROUGH;
+ case 2:
+ height0 *= 2;
+ }
+
+ unsigned lrz_pitch = align(DIV_ROUND_UP(width0, 8), 32);
+ unsigned lrz_height = align(DIV_ROUND_UP(height0, 8), 16);
+
+ unsigned size = lrz_pitch * lrz_height * 2;
+
+ rsc->lrz_height = lrz_height;
+ rsc->lrz_width = lrz_pitch;
+ rsc->lrz_pitch = lrz_pitch;
+ rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
}
static uint32_t
fd6_setup_slices(struct fd_resource *rsc)
{
- struct pipe_resource *prsc = &rsc->b.b;
+ struct pipe_resource *prsc = &rsc->b.b;
- if (!FD_DBG(NOLRZ) && has_depth(rsc->b.b.format))
- setup_lrz(rsc);
+ if (!FD_DBG(NOLRZ) && has_depth(rsc->b.b.format))
+ setup_lrz(rsc);
- if (rsc->layout.ubwc && !ok_ubwc_format(rsc->b.b.screen, rsc->b.b.format))
- rsc->layout.ubwc = false;
+ if (rsc->layout.ubwc && !ok_ubwc_format(rsc->b.b.screen, rsc->b.b.format))
+ rsc->layout.ubwc = false;
- fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
- prsc->width0, prsc->height0, prsc->depth0,
- prsc->last_level + 1, prsc->array_size,
- prsc->target == PIPE_TEXTURE_3D,
- NULL);
+ fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
+ prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
+ prsc->array_size, prsc->target == PIPE_TEXTURE_3D, NULL);
- return rsc->layout.size;
+ return rsc->layout.size;
}
static int
fill_ubwc_buffer_sizes(struct fd_resource *rsc)
{
- struct pipe_resource *prsc = &rsc->b.b;
- struct fdl_explicit_layout explicit = {
- .offset = rsc->layout.slices[0].offset,
- .pitch = rsc->layout.pitch0,
- };
+ struct pipe_resource *prsc = &rsc->b.b;
+ struct fdl_explicit_layout explicit = {
+ .offset = rsc->layout.slices[0].offset,
+ .pitch = rsc->layout.pitch0,
+ };
- if (!can_do_ubwc(prsc))
- return -1;
+ if (!can_do_ubwc(prsc))
+ return -1;
- rsc->layout.ubwc = true;
- rsc->layout.tile_mode = TILE6_3;
+ rsc->layout.ubwc = true;
+ rsc->layout.tile_mode = TILE6_3;
- if (!fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
- prsc->width0, prsc->height0, prsc->depth0,
- prsc->last_level + 1, prsc->array_size, false, &explicit))
- return -1;
+ if (!fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
+ prsc->width0, prsc->height0, prsc->depth0,
+ prsc->last_level + 1, prsc->array_size, false, &explicit))
+ return -1;
- if (rsc->layout.size > fd_bo_size(rsc->bo))
- return -1;
+ if (rsc->layout.size > fd_bo_size(rsc->bo))
+ return -1;
- return 0;
+ return 0;
}
static int
fd6_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier)
{
- switch (modifier) {
- case DRM_FORMAT_MOD_QCOM_COMPRESSED:
- return fill_ubwc_buffer_sizes(rsc);
- case DRM_FORMAT_MOD_LINEAR:
- if (can_do_ubwc(&rsc->b.b)) {
- perf_debug("%"PRSC_FMT": not UBWC: imported with DRM_FORMAT_MOD_LINEAR!",
- PRSC_ARGS(&rsc->b.b));
- }
- return 0;
- case DRM_FORMAT_MOD_INVALID:
- if (can_do_ubwc(&rsc->b.b)) {
- perf_debug("%"PRSC_FMT": not UBWC: imported with DRM_FORMAT_MOD_INVALID!",
- PRSC_ARGS(&rsc->b.b));
- }
- return 0;
- default:
- return -1;
- }
+ switch (modifier) {
+ case DRM_FORMAT_MOD_QCOM_COMPRESSED:
+ return fill_ubwc_buffer_sizes(rsc);
+ case DRM_FORMAT_MOD_LINEAR:
+ if (can_do_ubwc(&rsc->b.b)) {
+ perf_debug("%" PRSC_FMT
+ ": not UBWC: imported with DRM_FORMAT_MOD_LINEAR!",
+ PRSC_ARGS(&rsc->b.b));
+ }
+ return 0;
+ case DRM_FORMAT_MOD_INVALID:
+ if (can_do_ubwc(&rsc->b.b)) {
+ perf_debug("%" PRSC_FMT
+ ": not UBWC: imported with DRM_FORMAT_MOD_INVALID!",
+ PRSC_ARGS(&rsc->b.b));
+ }
+ return 0;
+ default:
+ return -1;
+ }
}
static const uint64_t supported_modifiers[] = {
- DRM_FORMAT_MOD_LINEAR,
- DRM_FORMAT_MOD_QCOM_COMPRESSED,
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_QCOM_COMPRESSED,
};
void
fd6_resource_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- screen->setup_slices = fd6_setup_slices;
- screen->layout_resource_for_modifier = fd6_layout_resource_for_modifier;
- screen->supported_modifiers = supported_modifiers;
- screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
+ screen->setup_slices = fd6_setup_slices;
+ screen->layout_resource_for_modifier = fd6_layout_resource_for_modifier;
+ screen->supported_modifiers = supported_modifiers;
+ screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
}
#include "freedreno_resource.h"
void fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
- enum pipe_format format) assert_dt;
-void fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
- int level, int layer);
+ enum pipe_format format) assert_dt;
+void fd6_emit_flag_reference(struct fd_ringbuffer *ring,
+ struct fd_resource *rsc, int level, int layer);
void fd6_resource_screen_init(struct pipe_screen *pscreen);
#endif /* FD6_RESOURCE_H_ */
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
-#include "fd6_screen.h"
#include "fd6_blitter.h"
#include "fd6_context.h"
#include "fd6_emit.h"
#include "fd6_format.h"
#include "fd6_resource.h"
+#include "fd6_screen.h"
#include "ir3/ir3_compiler.h"
static bool
valid_sample_count(unsigned sample_count)
{
- switch (sample_count) {
- case 0:
- case 1:
- case 2:
- case 4:
-// TODO seems 8x works, but increases lrz width or height.. but the
-// blob I have doesn't seem to expose any egl configs w/ 8x, so
-// just hide it for now and revisit later.
-// case 8:
- return true;
- default:
- return false;
- }
+ switch (sample_count) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ // TODO seems 8x works, but increases lrz width or height.. but the
+ // blob I have doesn't seem to expose any egl configs w/ 8x, so
+ // just hide it for now and revisit later.
+ // case 8:
+ return true;
+ default:
+ return false;
+ }
}
static bool
fd6_screen_is_format_supported(struct pipe_screen *pscreen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned usage)
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned storage_sample_count, unsigned usage)
{
- unsigned retval = 0;
-
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- !valid_sample_count(sample_count)) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
- util_format_name(format), target, sample_count, usage);
- return false;
- }
-
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
-
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- (fd6_pipe2vtx(format) != FMT6_NONE)) {
- retval |= PIPE_BIND_VERTEX_BUFFER;
- }
-
- if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
- (fd6_pipe2tex(format) != FMT6_NONE) &&
- (target == PIPE_BUFFER ||
- util_format_get_blocksize(format) != 12)) {
- retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
- }
-
- if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED |
- PIPE_BIND_COMPUTE_RESOURCE)) &&
- (fd6_pipe2color(format) != FMT6_NONE) &&
- (fd6_pipe2tex(format) != FMT6_NONE)) {
- retval |= usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED |
- PIPE_BIND_COMPUTE_RESOURCE);
- }
-
- /* For ARB_framebuffer_no_attachments: */
- if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
- retval |= usage & PIPE_BIND_RENDER_TARGET;
- }
-
- if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (fd6_pipe2depth(format) != (enum a6xx_depth_format)~0) &&
- (fd6_pipe2tex(format) != FMT6_NONE)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
- }
-
- if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
-
- if (retval != usage) {
- DBG("not supported: format=%s, target=%d, sample_count=%d, "
- "usage=%x, retval=%x", util_format_name(format),
- target, sample_count, usage, retval);
- }
-
- return retval == usage;
+ unsigned retval = 0;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ !valid_sample_count(sample_count)) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return false;
+ }
+
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
+
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd6_pipe2vtx(format) != FMT6_NONE)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
+ (fd6_pipe2tex(format) != FMT6_NONE) &&
+ (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
+ retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
+ }
+
+ if ((usage &
+ (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_COMPUTE_RESOURCE)) &&
+ (fd6_pipe2color(format) != FMT6_NONE) &&
+ (fd6_pipe2tex(format) != FMT6_NONE)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
+ PIPE_BIND_COMPUTE_RESOURCE);
+ }
+
+ /* For ARB_framebuffer_no_attachments: */
+ if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
+ retval |= usage & PIPE_BIND_RENDER_TARGET;
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd6_pipe2depth(format) != (enum a6xx_depth_format) ~0) &&
+ (fd6_pipe2tex(format) != FMT6_NONE)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x",
+ util_format_name(format), target, sample_count, usage, retval);
+ }
+
+ return retval == usage;
}
void
fd6_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = A6XX_MAX_RENDER_TARGETS;
+ screen->max_rts = A6XX_MAX_RENDER_TARGETS;
- /* Currently only FB_READ forces GMEM path, mostly because we'd have to
- * deal with cmdstream patching otherwise..
- */
- screen->gmem_reason_mask = FD_GMEM_CLEARS_DEPTH_STENCIL |
- FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED |
- FD_GMEM_BLEND_ENABLED | FD_GMEM_LOGICOP_ENABLED;
+ /* Currently only FB_READ forces GMEM path, mostly because we'd have to
+ * deal with cmdstream patching otherwise..
+ */
+ screen->gmem_reason_mask = FD_GMEM_CLEARS_DEPTH_STENCIL |
+ FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED |
+ FD_GMEM_BLEND_ENABLED | FD_GMEM_LOGICOP_ENABLED;
- pscreen->context_create = fd6_context_create;
- pscreen->is_format_supported = fd6_screen_is_format_supported;
+ pscreen->context_create = fd6_context_create;
+ pscreen->is_format_supported = fd6_screen_is_format_supported;
- screen->tile_mode = fd6_tile_mode;
+ screen->tile_mode = fd6_tile_mode;
- fd6_resource_screen_init(pscreen);
- fd6_emit_init_screen(pscreen);
- ir3_screen_init(pscreen);
+ fd6_resource_screen_init(pscreen);
+ fd6_emit_init_screen(pscreen);
+ ir3_screen_init(pscreen);
}
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/hash_table.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd6_texture.h"
-#include "fd6_resource.h"
-#include "fd6_format.h"
#include "fd6_emit.h"
+#include "fd6_format.h"
+#include "fd6_resource.h"
+#include "fd6_texture.h"
static void
remove_tex_entry(struct fd6_context *fd6_ctx, struct hash_entry *entry)
{
- struct fd6_texture_state *tex = entry->data;
- _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
- fd6_texture_state_reference(&tex, NULL);
+ struct fd6_texture_state *tex = entry->data;
+ _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+ fd6_texture_state_reference(&tex, NULL);
}
static enum a6xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border)
{
- switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return A6XX_TEX_REPEAT;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return A6XX_TEX_CLAMP_TO_EDGE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- *needs_border = true;
- return A6XX_TEX_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- /* only works for PoT.. need to emulate otherwise! */
- return A6XX_TEX_MIRROR_CLAMP;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return A6XX_TEX_MIRROR_REPEAT;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* these two we could perhaps emulate, but we currently
- * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
- */
- default:
- DBG("invalid wrap: %u", wrap);
- return 0;
- }
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A6XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A6XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ *needs_border = true;
+ return A6XX_TEX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ /* only works for PoT.. need to emulate otherwise! */
+ return A6XX_TEX_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A6XX_TEX_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* these two we could perhaps emulate, but we currently
+ * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+ */
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
}
static enum a6xx_tex_filter
tex_filter(unsigned filter, bool aniso)
{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST:
- return A6XX_TEX_NEAREST;
- case PIPE_TEX_FILTER_LINEAR:
- return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
- default:
- DBG("invalid filter: %u", filter);
- return 0;
- }
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A6XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
}
static void *
fd6_sampler_state_create(struct pipe_context *pctx,
- const struct pipe_sampler_state *cso)
+ const struct pipe_sampler_state *cso)
{
- struct fd6_sampler_stateobj *so = CALLOC_STRUCT(fd6_sampler_stateobj);
- unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
- bool miplinear = false;
-
- if (!so)
- return NULL;
-
- so->base = *cso;
- so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
-
- if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- miplinear = true;
-
- so->needs_border = false;
- so->texsamp0 =
- COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
- A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
- A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
- A6XX_TEX_SAMP_0_ANISO(aniso) |
- A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
- A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
- A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
-
- so->texsamp1 =
- COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE,
- A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
- COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
- COND(!cso->normalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS);
-
- so->texsamp0 |= A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
- so->texsamp1 |=
- A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
- A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
-
- if (cso->compare_mode)
- so->texsamp1 |= A6XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
-
- return so;
+ struct fd6_sampler_stateobj *so = CALLOC_STRUCT(fd6_sampler_stateobj);
+ unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+ bool miplinear = false;
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
+
+ if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ miplinear = true;
+
+ so->needs_border = false;
+ so->texsamp0 =
+ COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
+ A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+ A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+ A6XX_TEX_SAMP_0_ANISO(aniso) |
+ A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
+ A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
+ A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
+
+ so->texsamp1 =
+ COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE,
+ A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+ COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
+ COND(!cso->normalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS);
+
+ so->texsamp0 |= A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
+ so->texsamp1 |= A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+ A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+
+ if (cso->compare_mode)
+ so->texsamp1 |=
+ A6XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+ return so;
}
static void
fd6_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd6_sampler_stateobj *samp = hwcso;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_sampler_stateobj *samp = hwcso;
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- hash_table_foreach(fd6_ctx->tex_cache, entry) {
- struct fd6_texture_state *state = entry->data;
+ hash_table_foreach(fd6_ctx->tex_cache, entry)
+ {
+ struct fd6_texture_state *state = entry->data;
- for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) {
- if (samp->seqno == state->key.samp[i].seqno) {
- remove_tex_entry(fd6_ctx, entry);
- break;
- }
- }
- }
+ for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) {
+ if (samp->seqno == state->key.samp[i].seqno) {
+ remove_tex_entry(fd6_ctx, entry);
+ break;
+ }
+ }
+ }
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
- free(hwcso);
+ free(hwcso);
}
static struct pipe_sampler_view *
fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
- const struct pipe_sampler_view *cso)
+ const struct pipe_sampler_view *cso)
{
- struct fd6_pipe_sampler_view *so = CALLOC_STRUCT(fd6_pipe_sampler_view);
+ struct fd6_pipe_sampler_view *so = CALLOC_STRUCT(fd6_pipe_sampler_view);
- if (!so)
- return NULL;
+ if (!so)
+ return NULL;
- so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
- so->base.reference.count = 1;
- so->base.context = pctx;
- so->needs_validate = true;
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+ so->needs_validate = true;
- return &so->base;
+ return &so->base;
}
static void
fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
- unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
- in_dt
+ unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views);
+ fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
+ views);
- if (!views)
- return;
+ if (!views)
+ return;
- for (unsigned i = 0; i < nr; i++) {
- struct fd6_pipe_sampler_view *so = fd6_pipe_sampler_view(views[i]);
+ for (unsigned i = 0; i < nr; i++) {
+ struct fd6_pipe_sampler_view *so = fd6_pipe_sampler_view(views[i]);
- if (!(so && so->needs_validate))
- continue;
+ if (!(so && so->needs_validate))
+ continue;
- struct fd_resource *rsc = fd_resource(so->base.texture);
+ struct fd_resource *rsc = fd_resource(so->base.texture);
- fd6_validate_format(ctx, rsc, so->base.format);
- fd6_sampler_view_update(ctx, so);
+ fd6_validate_format(ctx, rsc, so->base.format);
+ fd6_sampler_view_update(ctx, so);
- so->needs_validate = false;
- }
+ so->needs_validate = false;
+ }
}
void
-fd6_sampler_view_update(struct fd_context *ctx, struct fd6_pipe_sampler_view *so)
+fd6_sampler_view_update(struct fd_context *ctx,
+ struct fd6_pipe_sampler_view *so)
{
- const struct pipe_sampler_view *cso = &so->base;
- struct pipe_resource *prsc = cso->texture;
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format format = cso->format;
- bool ubwc_enabled = false;
- unsigned lvl, layers = 0;
-
- fd6_validate_format(ctx, rsc, cso->format);
-
- if (format == PIPE_FORMAT_X32_S8X24_UINT) {
- rsc = rsc->stencil;
- format = rsc->b.b.format;
- }
-
- so->seqno = ++fd6_context(ctx)->tex_seqno;
- so->ptr1 = rsc;
- so->rsc_seqno = rsc->seqno;
-
- if (cso->target == PIPE_BUFFER) {
- unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
-
- lvl = 0;
- so->texconst1 =
- A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
- A6XX_TEX_CONST_1_HEIGHT(elements >> 15);
- so->texconst2 =
- A6XX_TEX_CONST_2_UNK4 |
- A6XX_TEX_CONST_2_UNK31;
- so->offset1 = cso->u.buf.offset;
- } else {
- unsigned miplevels;
-
- lvl = fd_sampler_first_level(cso);
- miplevels = fd_sampler_last_level(cso) - lvl;
- layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
-
- so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels);
- so->texconst1 =
- A6XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A6XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A6XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
- A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
-
- ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);
-
- if (rsc->b.b.format == PIPE_FORMAT_R8_G8B8_420_UNORM) {
- struct fd_resource *next = fd_resource(rsc->b.b.next);
-
- /* In case of biplanar R8_G8B8, the UBWC metadata address in
- * dwords 7 and 8, is instead the pointer to the second plane.
- */
- so->ptr2 = next;
- so->texconst6 =
- A6XX_TEX_CONST_6_PLANE_PITCH(fd_resource_pitch(next, lvl));
-
- if (ubwc_enabled) {
- /* Further, if using UBWC with R8_G8B8, we only point to the
- * UBWC header and the color data is expected to follow immediately.
- */
- so->offset1 =
- fd_resource_ubwc_offset(rsc, lvl, cso->u.tex.first_layer);
- so->offset2 =
- fd_resource_ubwc_offset(next, lvl, cso->u.tex.first_layer);
- } else {
- so->offset1 = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
- so->offset2 = fd_resource_offset(next, lvl, cso->u.tex.first_layer);
- }
- } else {
- so->offset1 = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
- if (ubwc_enabled) {
- so->ptr2 = rsc;
- so->offset2 = fd_resource_ubwc_offset(rsc, lvl, cso->u.tex.first_layer);
- }
- }
- }
-
- so->texconst0 |= fd6_tex_const_0(prsc, lvl, cso->format,
- cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a);
-
- so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target));
-
- switch (cso->target) {
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_2D:
- so->texconst3 =
- A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A6XX_TEX_CONST_5_DEPTH(1);
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- so->texconst3 =
- A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A6XX_TEX_CONST_5_DEPTH(layers);
- break;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- so->texconst3 =
- A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
- so->texconst5 =
- A6XX_TEX_CONST_5_DEPTH(layers / 6);
- break;
- case PIPE_TEXTURE_3D:
- so->texconst3 =
- A6XX_TEX_CONST_3_MIN_LAYERSZ(
- fd_resource_slice(rsc, prsc->last_level)->size0) |
- A6XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
- so->texconst5 =
- A6XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
- break;
- default:
- break;
- }
-
- if (rsc->layout.tile_all)
- so->texconst3 |= A6XX_TEX_CONST_3_TILE_ALL;
-
- if (ubwc_enabled) {
- uint32_t block_width, block_height;
- fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
-
- so->texconst3 |= A6XX_TEX_CONST_3_FLAG;
- so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2);
- so->texconst10 |=
- A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(fdl_ubwc_pitch(&rsc->layout, lvl)) |
- A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->width0, lvl), block_width))) |
- A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->height0, lvl), block_height)));
- }
+ const struct pipe_sampler_view *cso = &so->base;
+ struct pipe_resource *prsc = cso->texture;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format format = cso->format;
+ bool ubwc_enabled = false;
+ unsigned lvl, layers = 0;
+
+ fd6_validate_format(ctx, rsc, cso->format);
+
+ if (format == PIPE_FORMAT_X32_S8X24_UINT) {
+ rsc = rsc->stencil;
+ format = rsc->b.b.format;
+ }
+
+ so->seqno = ++fd6_context(ctx)->tex_seqno;
+ so->ptr1 = rsc;
+ so->rsc_seqno = rsc->seqno;
+
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
+
+ lvl = 0;
+ so->texconst1 = A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
+ A6XX_TEX_CONST_1_HEIGHT(elements >> 15);
+ so->texconst2 = A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31;
+ so->offset1 = cso->u.buf.offset;
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 = A6XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A6XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 = A6XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
+ A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
+
+ ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);
+
+ if (rsc->b.b.format == PIPE_FORMAT_R8_G8B8_420_UNORM) {
+ struct fd_resource *next = fd_resource(rsc->b.b.next);
+
+ /* In case of biplanar R8_G8B8, the UBWC metadata address in
+ * dwords 7 and 8, is instead the pointer to the second plane.
+ */
+ so->ptr2 = next;
+ so->texconst6 =
+ A6XX_TEX_CONST_6_PLANE_PITCH(fd_resource_pitch(next, lvl));
+
+ if (ubwc_enabled) {
+ /* Further, if using UBWC with R8_G8B8, we only point to the
+ * UBWC header and the color data is expected to follow immediately.
+ */
+ so->offset1 =
+ fd_resource_ubwc_offset(rsc, lvl, cso->u.tex.first_layer);
+ so->offset2 =
+ fd_resource_ubwc_offset(next, lvl, cso->u.tex.first_layer);
+ } else {
+ so->offset1 = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ so->offset2 = fd_resource_offset(next, lvl, cso->u.tex.first_layer);
+ }
+ } else {
+ so->offset1 = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ if (ubwc_enabled) {
+ so->ptr2 = rsc;
+ so->offset2 =
+ fd_resource_ubwc_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+ }
+ }
+
+ so->texconst0 |=
+ fd6_tex_const_0(prsc, lvl, cso->format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
+ so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target));
+
+ switch (cso->target) {
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ so->texconst3 = A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A6XX_TEX_CONST_5_DEPTH(1);
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ so->texconst3 = A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A6XX_TEX_CONST_5_DEPTH(layers);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ so->texconst3 = A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
+ so->texconst5 = A6XX_TEX_CONST_5_DEPTH(layers / 6);
+ break;
+ case PIPE_TEXTURE_3D:
+ so->texconst3 =
+ A6XX_TEX_CONST_3_MIN_LAYERSZ(
+ fd_resource_slice(rsc, prsc->last_level)->size0) |
+ A6XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
+ so->texconst5 = A6XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
+ break;
+ default:
+ break;
+ }
+
+ if (rsc->layout.tile_all)
+ so->texconst3 |= A6XX_TEX_CONST_3_TILE_ALL;
+
+ if (ubwc_enabled) {
+ uint32_t block_width, block_height;
+ fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
+ so->texconst3 |= A6XX_TEX_CONST_3_FLAG;
+ so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(
+ rsc->layout.ubwc_layer_size >> 2);
+ so->texconst10 |=
+ A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(
+ fdl_ubwc_pitch(&rsc->layout, lvl)) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(
+ DIV_ROUND_UP(u_minify(prsc->width0, lvl), block_width))) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(
+ DIV_ROUND_UP(u_minify(prsc->height0, lvl), block_height)));
+ }
}
/* NOTE this can be called in either driver thread or frontend thread
*/
static void
fd6_sampler_view_destroy(struct pipe_context *pctx,
- struct pipe_sampler_view *_view)
+ struct pipe_sampler_view *_view)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view);
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- hash_table_foreach(fd6_ctx->tex_cache, entry) {
- struct fd6_texture_state *state = entry->data;
+ hash_table_foreach(fd6_ctx->tex_cache, entry)
+ {
+ struct fd6_texture_state *state = entry->data;
- for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
- if (view->seqno == state->key.view[i].seqno) {
- remove_tex_entry(fd6_ctx, entry);
- break;
- }
- }
- }
+ for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
+ if (view->seqno == state->key.view[i].seqno) {
+ remove_tex_entry(fd6_ctx, entry);
+ break;
+ }
+ }
+ }
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
- pipe_resource_reference(&view->base.texture, NULL);
+ pipe_resource_reference(&view->base.texture, NULL);
- free(view);
+ free(view);
}
-
static uint32_t
key_hash(const void *_key)
{
- const struct fd6_texture_key *key = _key;
- return XXH32(key, sizeof(*key), 0);
+ const struct fd6_texture_key *key = _key;
+ return XXH32(key, sizeof(*key), 0);
}
static bool
key_equals(const void *_a, const void *_b)
{
- const struct fd6_texture_key *a = _a;
- const struct fd6_texture_key *b = _b;
- return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
+ const struct fd6_texture_key *a = _a;
+ const struct fd6_texture_key *b = _b;
+ return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
}
struct fd6_texture_state *
fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type,
- struct fd_texture_stateobj *tex)
+ struct fd_texture_stateobj *tex)
{
- struct fd6_context *fd6_ctx = fd6_context(ctx);
- struct fd6_texture_state *state = NULL;
- struct fd6_texture_key key;
- bool needs_border = false;
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_texture_state *state = NULL;
+ struct fd6_texture_key key;
+ bool needs_border = false;
- memset(&key, 0, sizeof(key));
+ memset(&key, 0, sizeof(key));
- for (unsigned i = 0; i < tex->num_textures; i++) {
- if (!tex->textures[i])
- continue;
+ for (unsigned i = 0; i < tex->num_textures; i++) {
+ if (!tex->textures[i])
+ continue;
- struct fd6_pipe_sampler_view *view =
- fd6_pipe_sampler_view(tex->textures[i]);
+ struct fd6_pipe_sampler_view *view =
+ fd6_pipe_sampler_view(tex->textures[i]);
- /* NOTE that if the backing rsc was uncompressed between the
- * time that the CSO was originally created and now, the rsc
- * seqno would have changed, so we don't have to worry about
- * getting a bogus cache hit.
- */
- key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno;
- key.view[i].seqno = view->seqno;
- }
+ /* NOTE that if the backing rsc was uncompressed between the
+ * time that the CSO was originally created and now, the rsc
+ * seqno would have changed, so we don't have to worry about
+ * getting a bogus cache hit.
+ */
+ key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno;
+ key.view[i].seqno = view->seqno;
+ }
- for (unsigned i = 0; i < tex->num_samplers; i++) {
- if (!tex->samplers[i])
- continue;
+ for (unsigned i = 0; i < tex->num_samplers; i++) {
+ if (!tex->samplers[i])
+ continue;
- struct fd6_sampler_stateobj *sampler =
- fd6_sampler_stateobj(tex->samplers[i]);
+ struct fd6_sampler_stateobj *sampler =
+ fd6_sampler_stateobj(tex->samplers[i]);
- key.samp[i].seqno = sampler->seqno;
+ key.samp[i].seqno = sampler->seqno;
- needs_border |= sampler->needs_border;
- }
+ needs_border |= sampler->needs_border;
+ }
- key.type = type;
- key.bcolor_offset = fd6_border_color_offset(ctx, type, tex);
+ key.type = type;
+ key.bcolor_offset = fd6_border_color_offset(ctx, type, tex);
- uint32_t hash = key_hash(&key);
- fd_screen_lock(ctx->screen);
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key);
+ uint32_t hash = key_hash(&key);
+ fd_screen_lock(ctx->screen);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key);
- if (entry) {
- fd6_texture_state_reference(&state, entry->data);
- goto out_unlock;
- }
+ if (entry) {
+ fd6_texture_state_reference(&state, entry->data);
+ goto out_unlock;
+ }
- state = CALLOC_STRUCT(fd6_texture_state);
+ state = CALLOC_STRUCT(fd6_texture_state);
- /* NOTE: one ref for tex_cache, and second ref for returned state: */
- pipe_reference_init(&state->reference, 2);
- state->key = key;
- state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
- state->needs_border = needs_border;
+ /* NOTE: one ref for tex_cache, and second ref for returned state: */
+ pipe_reference_init(&state->reference, 2);
+ state->key = key;
+ state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+ state->needs_border = needs_border;
- fd6_emit_textures(ctx, state->stateobj, type, tex, key.bcolor_offset, NULL);
+ fd6_emit_textures(ctx, state->stateobj, type, tex, key.bcolor_offset, NULL);
- /* NOTE: uses copy of key in state obj, because pointer passed by caller
- * is probably on the stack
- */
- _mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash,
- &state->key, state);
+ /* NOTE: uses copy of key in state obj, because pointer passed by caller
+ * is probably on the stack
+ */
+ _mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash, &state->key,
+ state);
out_unlock:
- fd_screen_unlock(ctx->screen);
- return state;
+ fd_screen_unlock(ctx->screen);
+ return state;
}
void
-__fd6_texture_state_describe(char* buf, const struct fd6_texture_state *tex)
+__fd6_texture_state_describe(char *buf, const struct fd6_texture_state *tex)
{
- sprintf(buf, "fd6_texture_state<%p>", tex);
+ sprintf(buf, "fd6_texture_state<%p>", tex);
}
void
__fd6_texture_state_destroy(struct fd6_texture_state *state)
{
- fd_ringbuffer_del(state->stateobj);
- free(state);
+ fd_ringbuffer_del(state->stateobj);
+ free(state);
}
static void
-fd6_rebind_resource(struct fd_context *ctx, struct fd_resource *rsc)
- assert_dt
+fd6_rebind_resource(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
{
- fd_screen_assert_locked(ctx->screen);
+ fd_screen_assert_locked(ctx->screen);
- if (!(rsc->dirty & FD_DIRTY_TEX))
- return;
+ if (!(rsc->dirty & FD_DIRTY_TEX))
+ return;
- struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
- hash_table_foreach (fd6_ctx->tex_cache, entry) {
- struct fd6_texture_state *state = entry->data;
+ hash_table_foreach(fd6_ctx->tex_cache, entry)
+ {
+ struct fd6_texture_state *state = entry->data;
- for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
- if (rsc->seqno == state->key.view[i].rsc_seqno) {
- remove_tex_entry(fd6_ctx, entry);
- break;
- }
- }
- }
+ for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
+ if (rsc->seqno == state->key.view[i].rsc_seqno) {
+ remove_tex_entry(fd6_ctx, entry);
+ break;
+ }
+ }
+ }
}
void
-fd6_texture_init(struct pipe_context *pctx)
- disable_thread_safety_analysis
+fd6_texture_init(struct pipe_context *pctx) disable_thread_safety_analysis
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
- pctx->create_sampler_state = fd6_sampler_state_create;
- pctx->delete_sampler_state = fd6_sampler_state_delete;
- pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->create_sampler_state = fd6_sampler_state_create;
+ pctx->delete_sampler_state = fd6_sampler_state_delete;
+ pctx->bind_sampler_states = fd_sampler_states_bind;
- pctx->create_sampler_view = fd6_sampler_view_create;
- pctx->sampler_view_destroy = fd6_sampler_view_destroy;
- pctx->set_sampler_views = fd6_set_sampler_views;
+ pctx->create_sampler_view = fd6_sampler_view_create;
+ pctx->sampler_view_destroy = fd6_sampler_view_destroy;
+ pctx->set_sampler_views = fd6_set_sampler_views;
- ctx->rebind_resource = fd6_rebind_resource;
+ ctx->rebind_resource = fd6_rebind_resource;
- fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals);
+ fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals);
}
void
fd6_texture_fini(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- hash_table_foreach(fd6_ctx->tex_cache, entry) {
- remove_tex_entry(fd6_ctx, entry);
- }
+ hash_table_foreach(fd6_ctx->tex_cache, entry)
+ {
+ remove_tex_entry(fd6_ctx, entry);
+ }
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
- ralloc_free(fd6_ctx->tex_cache);
+ ralloc_free(fd6_ctx->tex_cache);
}
#include "pipe/p_context.h"
-#include "freedreno_texture.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "fd6_context.h"
#include "fd6_format.h"
struct fd6_sampler_stateobj {
- struct pipe_sampler_state base;
- uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
- bool needs_border;
- uint16_t seqno;
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
+ bool needs_border;
+ uint16_t seqno;
};
static inline struct fd6_sampler_stateobj *
fd6_sampler_stateobj(struct pipe_sampler_state *samp)
{
- return (struct fd6_sampler_stateobj *)samp;
+ return (struct fd6_sampler_stateobj *)samp;
}
struct fd6_pipe_sampler_view {
- struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
- uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
- uint32_t offset1, offset2;
- struct fd_resource *ptr1, *ptr2;
- uint16_t seqno;
-
- /* For detecting when a resource has transitioned from UBWC compressed
- * to uncompressed, which means the sampler state needs to be updated
- */
- uint16_t rsc_seqno;
-
- bool needs_validate;
+ struct pipe_sampler_view base;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
+ uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
+ uint32_t offset1, offset2;
+ struct fd_resource *ptr1, *ptr2;
+ uint16_t seqno;
+
+ /* For detecting when a resource has transitioned from UBWC compressed
+ * to uncompressed, which means the sampler state needs to be updated
+ */
+ uint16_t rsc_seqno;
+
+ bool needs_validate;
};
static inline struct fd6_pipe_sampler_view *
fd6_pipe_sampler_view(struct pipe_sampler_view *pview)
{
- return (struct fd6_pipe_sampler_view *)pview;
+ return (struct fd6_pipe_sampler_view *)pview;
}
-void fd6_sampler_view_update(struct fd_context *ctx, struct fd6_pipe_sampler_view *so) assert_dt;
+void fd6_sampler_view_update(struct fd_context *ctx,
+ struct fd6_pipe_sampler_view *so) assert_dt;
void fd6_texture_init(struct pipe_context *pctx);
void fd6_texture_fini(struct pipe_context *pctx);
static inline enum a6xx_tex_type
fd6_tex_type(unsigned target)
{
- switch (target) {
- default:
- assert(0);
- case PIPE_BUFFER:
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return A6XX_TEX_1D;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- return A6XX_TEX_2D;
- case PIPE_TEXTURE_3D:
- return A6XX_TEX_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return A6XX_TEX_CUBE;
- }
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A6XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A6XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A6XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A6XX_TEX_CUBE;
+ }
}
static inline unsigned
fd6_border_color_offset(struct fd_context *ctx, enum pipe_shader_type type,
- struct fd_texture_stateobj *tex)
- assert_dt
+ struct fd_texture_stateobj *tex) assert_dt
{
- /* Currently we put the FS border-color state after VS. Possibly
- * we could swap the order.
- *
- * This will need update for HS/DS/GS
- */
- if (type != PIPE_SHADER_FRAGMENT)
- return 0;
+ /* Currently we put the FS border-color state after VS. Possibly
+ * we could swap the order.
+ *
+ * This will need update for HS/DS/GS
+ */
+ if (type != PIPE_SHADER_FRAGMENT)
+ return 0;
- unsigned needs_border = false;
+ unsigned needs_border = false;
- for (unsigned i = 0; i < tex->num_samplers; i++) {
- if (!tex->samplers[i])
- continue;
+ for (unsigned i = 0; i < tex->num_samplers; i++) {
+ if (!tex->samplers[i])
+ continue;
- struct fd6_sampler_stateobj *sampler =
- fd6_sampler_stateobj(tex->samplers[i]);
+ struct fd6_sampler_stateobj *sampler =
+ fd6_sampler_stateobj(tex->samplers[i]);
- needs_border |= sampler->needs_border;
- }
+ needs_border |= sampler->needs_border;
+ }
- if (!needs_border)
- return 0;
+ if (!needs_border)
+ return 0;
- return ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
+ return ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
}
/*
*/
struct fd6_texture_key {
- struct {
- /* We need to track the seqno of the rsc as well as of the
- * sampler view, because resource shadowing/etc can result
- * that the underlying bo changes (which means the previous
- * state was no longer valid.
- */
- uint16_t rsc_seqno;
- uint16_t seqno;
- } view[16];
- struct {
- uint16_t seqno;
- } samp[16];
- uint8_t type;
- uint8_t bcolor_offset;
+ struct {
+ /* We need to track the seqno of the rsc as well as of the
+ * sampler view, because resource shadowing/etc can result
+ * that the underlying bo changes (which means the previous
+ * state was no longer valid.
+ */
+ uint16_t rsc_seqno;
+ uint16_t seqno;
+ } view[16];
+ struct {
+ uint16_t seqno;
+ } samp[16];
+ uint8_t type;
+ uint8_t bcolor_offset;
};
struct fd6_texture_state {
- struct pipe_reference reference;
- struct fd6_texture_key key;
- struct fd_ringbuffer *stateobj;
- bool needs_border;
+ struct pipe_reference reference;
+ struct fd6_texture_key key;
+ struct fd_ringbuffer *stateobj;
+ bool needs_border;
};
-struct fd6_texture_state * fd6_texture_state(struct fd_context *ctx,
- enum pipe_shader_type type, struct fd_texture_stateobj *tex) assert_dt;
+struct fd6_texture_state *
+fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type,
+ struct fd_texture_stateobj *tex) assert_dt;
/* not called directly: */
-void __fd6_texture_state_describe(char* buf, const struct fd6_texture_state *tex);
+void __fd6_texture_state_describe(char *buf,
+ const struct fd6_texture_state *tex);
void __fd6_texture_state_destroy(struct fd6_texture_state *tex);
static inline void
-fd6_texture_state_reference(struct fd6_texture_state **ptr, struct fd6_texture_state *tex)
+fd6_texture_state_reference(struct fd6_texture_state **ptr,
+ struct fd6_texture_state *tex)
{
- struct fd6_texture_state *old_tex = *ptr;
+ struct fd6_texture_state *old_tex = *ptr;
- if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
- (debug_reference_descriptor)__fd6_texture_state_describe))
- __fd6_texture_state_destroy(old_tex);
+ if (pipe_reference_described(
+ &(*ptr)->reference, &tex->reference,
+ (debug_reference_descriptor)__fd6_texture_state_describe))
+ __fd6_texture_state_destroy(old_tex);
- *ptr = tex;
+ *ptr = tex;
}
#endif /* FD6_TEXTURE_H_ */
* SOFTWARE.
*/
-
#include "pipe/p_state.h"
#include "util/u_prim.h"
*/
enum {
- byte = 8,
- dword = 4 * byte,
+ byte = 8,
+ dword = 4 * byte,
} bits_per;
/**
static unsigned
number_size_bits(unsigned nr)
{
- unsigned n = util_last_bit(nr);
- assert(n); /* encoding 0 is not possible */
- return n + (n - 1);
+ unsigned n = util_last_bit(nr);
+ assert(n); /* encoding 0 is not possible */
+ return n + (n - 1);
}
/**
static unsigned
bitfield_size_bits(unsigned n)
{
- return n + 1; /* worst case is always 1 + nr of bits */
+ return n + 1; /* worst case is always 1 + nr of bits */
}
static unsigned
prim_count(const struct pipe_draw_info *info,
const struct pipe_draw_start_count *draw)
{
- /* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
- unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
- u_vertices_per_prim(info->mode);
- return MAX2(1, (draw->count * info->instance_count) / vtx_per_prim);
+ /* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
+ unsigned vtx_per_prim =
+ (info->mode == PIPE_PRIM_MAX) ? 2 : u_vertices_per_prim(info->mode);
+ return MAX2(1, (draw->count * info->instance_count) / vtx_per_prim);
}
/**
* The primitive stream uses a run-length encoding, where each packet contains a
- * bitfield of bins covered and then the number of primitives which have the same
- * bitfield. Each packet consists of the following, in order:
+ * bitfield of bins covered and then the number of primitives which have the
+ * same bitfield. Each packet consists of the following, in order:
*
* - The (compressed) bitfield of bins covered
* - The number of primitives with this bitset
* - Checksum
*
- * The worst case would be that each primitive has a different bitmask. In practice,
- * assuming ever other primitive has a different bitmask still gets us conservatively
- * large primitive stream sizes. (Ie. 10x what is needed, vs. 20x)
+ * The worst case would be that each primitive has a different bitmask. In
+ * practice, assuming ever other primitive has a different bitmask still gets us
+ * conservatively large primitive stream sizes. (Ie. 10x what is needed, vs.
+ * 20x)
*
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
*/
static unsigned
primitive_stream_size_bits(const struct pipe_draw_info *info,
- const struct pipe_draw_start_count *draw, unsigned num_bins)
+ const struct pipe_draw_start_count *draw,
+ unsigned num_bins)
{
- unsigned num_prims = prim_count(info, draw);
- unsigned nbits =
- (bitfield_size_bits(num_bins) /* bitfield of bins covered */
- + number_size_bits(1) /* number of primitives with this bitset */
- + 1 /* checksum */
- ) * DIV_ROUND_UP(num_prims, 2);
- return align(nbits, dword);
+ unsigned num_prims = prim_count(info, draw);
+ unsigned nbits =
+ (bitfield_size_bits(num_bins) /* bitfield of bins covered */
+ + number_size_bits(1) /* number of primitives with this bitset */
+ + 1 /* checksum */
+ ) *
+ DIV_ROUND_UP(num_prims, 2);
+ return align(nbits, dword);
}
/**
*/
static unsigned
draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
- unsigned prim_strm_bits)
+ unsigned prim_strm_bits)
{
- unsigned ndwords = prim_strm_bits / dword;
- return (bitfield_size_bits(num_bins) /* bitfield of bins */
- + 1 /* last-instance-bit */
- + number_size_bits(ndwords) /* size of corresponding prim strm */
- + 1 /* checksum */
- ) * MAX2(1, info->instance_count);
+ unsigned ndwords = prim_strm_bits / dword;
+ return (bitfield_size_bits(num_bins) /* bitfield of bins */
+ + 1 /* last-instance-bit */
+ + number_size_bits(ndwords) /* size of corresponding prim strm */
+ + 1 /* checksum */
+ ) *
+ MAX2(1, info->instance_count);
}
void
fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info,
const struct pipe_draw_start_count *draw)
{
- if (!batch->num_bins_per_pipe) {
- batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
-
- /* This is a convenient spot to add the size of the final draw-
- * stream packet:
- *
- * If there are N bins, the final packet, after all the draws are
- * done, consists of a 1 followed by N + 17 0's, plus a final 1.
- * This uses the otherwise-unused pattern of a non-empty bitfield
- * (initial 1) that is nontheless empty (has all 0's)
- */
- unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
- batch->prim_strm_bits = align(final_pkt_sz, dword);
- }
-
- unsigned prim_strm_bits =
- primitive_stream_size_bits(info, draw, batch->num_bins_per_pipe);
- unsigned draw_strm_bits =
- draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
+ if (!batch->num_bins_per_pipe) {
+ batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
+
+ /* This is a convenient spot to add the size of the final draw-
+ * stream packet:
+ *
+ * If there are N bins, the final packet, after all the draws are
+ * done, consists of a 1 followed by N + 17 0's, plus a final 1.
+ * This uses the otherwise-unused pattern of a non-empty bitfield
+ * (initial 1) that is nontheless empty (has all 0's)
+ */
+ unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
+ batch->prim_strm_bits = align(final_pkt_sz, dword);
+ }
+
+ unsigned prim_strm_bits =
+ primitive_stream_size_bits(info, draw, batch->num_bins_per_pipe);
+ unsigned draw_strm_bits =
+ draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
#if 0
printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
u_prim_name(info->mode));
#endif
- batch->prim_strm_bits += prim_strm_bits;
- batch->draw_strm_bits += draw_strm_bits;
+ batch->prim_strm_bits += prim_strm_bits;
+ batch->draw_strm_bits += draw_strm_bits;
}
-
#ifndef FD6_VSC_H_
#define FD6_VSC_H_
-void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info,
+void fd6_vsc_update_sizes(struct fd_batch *batch,
+ const struct pipe_draw_info *info,
const struct pipe_draw_start_count *draw);
#endif /* FD6_VSC_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "fd6_zsa.h"
#include "fd6_context.h"
#include "fd6_format.h"
+#include "fd6_zsa.h"
/* update lza state based on stencil-test func:
*
*/
static void
update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func,
- bool stencil_write)
+ bool stencil_write)
{
- switch (func) {
- case PIPE_FUNC_ALWAYS:
- /* nothing to do for LRZ, but for stencil test when stencil-
- * write is enabled, we need to disable lrz-test, since
- * conceptually stencil test and write happens before depth-
- * test:
- */
- if (stencil_write) {
- so->lrz.enable = false;
- so->lrz.test = false;
- }
- break;
- case PIPE_FUNC_NEVER:
- /* fragment never passes, disable lrz_write for this draw: */
- so->lrz.write = false;
- break;
- default:
- /* whether the fragment passes or not depends on result
- * of stencil test, which we cannot know when doing binning
- * pass:
- */
- so->lrz.write = false;
- /* similarly to the PIPE_FUNC_ALWAY case, if there are side-
- * effects from stencil test we need to disable lrz-test.
- */
- if (stencil_write) {
- so->lrz.enable = false;
- so->lrz.test = false;
- }
- break;
- }
+ switch (func) {
+ case PIPE_FUNC_ALWAYS:
+ /* nothing to do for LRZ, but for stencil test when stencil-
+ * write is enabled, we need to disable lrz-test, since
+ * conceptually stencil test and write happens before depth-
+ * test:
+ */
+ if (stencil_write) {
+ so->lrz.enable = false;
+ so->lrz.test = false;
+ }
+ break;
+ case PIPE_FUNC_NEVER:
+ /* fragment never passes, disable lrz_write for this draw: */
+ so->lrz.write = false;
+ break;
+ default:
+ /* whether the fragment passes or not depends on result
+ * of stencil test, which we cannot know when doing binning
+ * pass:
+ */
+ so->lrz.write = false;
+ /* similarly to the PIPE_FUNC_ALWAY case, if there are side-
+ * effects from stencil test we need to disable lrz-test.
+ */
+ if (stencil_write) {
+ so->lrz.enable = false;
+ so->lrz.test = false;
+ }
+ break;
+ }
}
void *
fd6_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso)
+ const struct pipe_depth_stencil_alpha_state *cso)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd6_zsa_stateobj *so;
-
- so = CALLOC_STRUCT(fd6_zsa_stateobj);
- if (!so)
- return NULL;
-
- so->base = *cso;
-
- so->writes_zs = util_writes_depth_stencil(cso);
-
- so->rb_depth_cntl |=
- A6XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
-
- if (cso->depth_enabled) {
- so->rb_depth_cntl |=
- A6XX_RB_DEPTH_CNTL_Z_ENABLE |
- A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
-
- so->lrz.test = true;
-
- if (cso->depth_writemask) {
- so->lrz.write = true;
- }
-
- switch (cso->depth_func) {
- case PIPE_FUNC_LESS:
- case PIPE_FUNC_LEQUAL:
- so->lrz.enable = true;
- so->lrz.direction = FD_LRZ_LESS;
- break;
-
- case PIPE_FUNC_GREATER:
- case PIPE_FUNC_GEQUAL:
- so->lrz.enable = true;
- so->lrz.direction = FD_LRZ_GREATER;
- break;
-
- case PIPE_FUNC_NEVER:
- so->lrz.enable = true;
- so->lrz.write = false;
- so->lrz.direction = FD_LRZ_LESS;
- break;
-
- /* TODO revisit these: */
- case PIPE_FUNC_EQUAL:
- case PIPE_FUNC_NOTEQUAL:
- case PIPE_FUNC_ALWAYS:
- so->lrz.write = false;
- so->invalidate_lrz = true;
- break;
- }
- }
-
- if (cso->depth_writemask)
- so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
-
- if (cso->stencil[0].enabled) {
- const struct pipe_stencil_state *s = &cso->stencil[0];
-
- /* stencil test happens before depth test, so without performing
- * stencil test we don't really know what the updates to the
- * depth buffer will be.
- */
- update_lrz_stencil(so, s->func, !!s->writemask);
-
- so->rb_stencil_control |=
- A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
- A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
- A6XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
- A6XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
- A6XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
- A6XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
-
- so->rb_stencilmask = A6XX_RB_STENCILMASK_MASK(s->valuemask);
- so->rb_stencilwrmask = A6XX_RB_STENCILWRMASK_WRMASK(s->writemask);
-
- if (cso->stencil[1].enabled) {
- const struct pipe_stencil_state *bs = &cso->stencil[1];
-
- update_lrz_stencil(so, bs->func, !!bs->writemask);
-
- so->rb_stencil_control |=
- A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
- A6XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
- A6XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
- A6XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
- A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
-
- so->rb_stencilmask |= A6XX_RB_STENCILMASK_BFMASK(bs->valuemask);
- so->rb_stencilwrmask |= A6XX_RB_STENCILWRMASK_BFWRMASK(bs->writemask);
- }
- }
-
- if (cso->alpha_enabled) {
- /* Alpha test is functionally a conditional discard, so we can't
- * write LRZ before seeing if we end up discarding or not
- */
- if (cso->alpha_func != PIPE_FUNC_ALWAYS) {
- so->lrz.write = false;
- so->alpha_test = true;
- }
-
- uint32_t ref = cso->alpha_ref_value * 255.0;
- so->rb_alpha_control =
- A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
- A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
- A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
- }
-
- for (int i = 0; i < 4; i++) {
- struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 9 * 4);
-
- OUT_PKT4(ring, REG_A6XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, (i & FD6_ZSA_NO_ALPHA) ?
- so->rb_alpha_control & ~A6XX_RB_ALPHA_CONTROL_ALPHA_TEST :
- so->rb_alpha_control);
-
- OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CONTROL, 1);
- OUT_RING(ring, so->rb_stencil_control);
-
- OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1);
- OUT_RING(ring, so->rb_depth_cntl |
- COND(i & FD6_ZSA_DEPTH_CLAMP, A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE));
-
- OUT_PKT4(ring, REG_A6XX_RB_STENCILMASK, 2);
- OUT_RING(ring, so->rb_stencilmask);
- OUT_RING(ring, so->rb_stencilwrmask);
-
- so->stateobj[i] = ring;
- }
-
- return so;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd6_zsa_stateobj *so;
+
+ so = CALLOC_STRUCT(fd6_zsa_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->writes_zs = util_writes_depth_stencil(cso);
+
+ so->rb_depth_cntl |=
+ A6XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
+
+ if (cso->depth_enabled) {
+ so->rb_depth_cntl |=
+ A6XX_RB_DEPTH_CNTL_Z_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
+ so->lrz.test = true;
+
+ if (cso->depth_writemask) {
+ so->lrz.write = true;
+ }
+
+ switch (cso->depth_func) {
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ so->lrz.enable = true;
+ so->lrz.direction = FD_LRZ_LESS;
+ break;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ so->lrz.enable = true;
+ so->lrz.direction = FD_LRZ_GREATER;
+ break;
+
+ case PIPE_FUNC_NEVER:
+ so->lrz.enable = true;
+ so->lrz.write = false;
+ so->lrz.direction = FD_LRZ_LESS;
+ break;
+
+ /* TODO revisit these: */
+ case PIPE_FUNC_EQUAL:
+ case PIPE_FUNC_NOTEQUAL:
+ case PIPE_FUNC_ALWAYS:
+ so->lrz.write = false;
+ so->invalidate_lrz = true;
+ break;
+ }
+ }
+
+ if (cso->depth_writemask)
+ so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
+
+ /* stencil test happens before depth test, so without performing
+ * stencil test we don't really know what the updates to the
+ * depth buffer will be.
+ */
+ update_lrz_stencil(so, s->func, !!s->writemask);
+
+ so->rb_stencil_control |=
+ A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
+ A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A6XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A6XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A6XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A6XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+
+ so->rb_stencilmask = A6XX_RB_STENCILMASK_MASK(s->valuemask);
+ so->rb_stencilwrmask = A6XX_RB_STENCILWRMASK_WRMASK(s->writemask);
+
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+ update_lrz_stencil(so, bs->func, !!bs->writemask);
+
+ so->rb_stencil_control |=
+ A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A6XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A6XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A6XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+
+ so->rb_stencilmask |= A6XX_RB_STENCILMASK_BFMASK(bs->valuemask);
+ so->rb_stencilwrmask |= A6XX_RB_STENCILWRMASK_BFWRMASK(bs->writemask);
+ }
+ }
+
+ if (cso->alpha_enabled) {
+ /* Alpha test is functionally a conditional discard, so we can't
+ * write LRZ before seeing if we end up discarding or not
+ */
+ if (cso->alpha_func != PIPE_FUNC_ALWAYS) {
+ so->lrz.write = false;
+ so->alpha_test = true;
+ }
+
+ uint32_t ref = cso->alpha_ref_value * 255.0;
+ so->rb_alpha_control =
+ A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+ A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
+ A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 9 * 4);
+
+ OUT_PKT4(ring, REG_A6XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring,
+ (i & FD6_ZSA_NO_ALPHA)
+ ? so->rb_alpha_control & ~A6XX_RB_ALPHA_CONTROL_ALPHA_TEST
+ : so->rb_alpha_control);
+
+ OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, so->rb_stencil_control);
+
+ OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1);
+ OUT_RING(ring,
+ so->rb_depth_cntl | COND(i & FD6_ZSA_DEPTH_CLAMP,
+ A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE));
+
+ OUT_PKT4(ring, REG_A6XX_RB_STENCILMASK, 2);
+ OUT_RING(ring, so->rb_stencilmask);
+ OUT_RING(ring, so->rb_stencilwrmask);
+
+ so->stateobj[i] = ring;
+ }
+
+ return so;
}
void
fd6_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct fd6_zsa_stateobj *so = hwcso;
+ struct fd6_zsa_stateobj *so = hwcso;
- for (int i = 0; i < ARRAY_SIZE(so->stateobj); i++)
- fd_ringbuffer_del(so->stateobj[i]);
- FREE(hwcso);
+ for (int i = 0; i < ARRAY_SIZE(so->stateobj); i++)
+ fd_ringbuffer_del(so->stateobj[i]);
+ FREE(hwcso);
}
#ifndef FD6_ZSA_H_
#define FD6_ZSA_H_
-
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
#define FD6_ZSA_DEPTH_CLAMP (1 << 1)
struct fd6_zsa_stateobj {
- struct pipe_depth_stencil_alpha_state base;
+ struct pipe_depth_stencil_alpha_state base;
- uint32_t rb_alpha_control;
- uint32_t rb_depth_cntl;
- uint32_t rb_stencil_control;
- uint32_t rb_stencilmask;
- uint32_t rb_stencilwrmask;
+ uint32_t rb_alpha_control;
+ uint32_t rb_depth_cntl;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencilmask;
+ uint32_t rb_stencilwrmask;
- struct fd6_lrz_state lrz;
- bool writes_zs; /* writes depth and/or stencil */
- bool invalidate_lrz;
- bool alpha_test;
+ struct fd6_lrz_state lrz;
+ bool writes_zs; /* writes depth and/or stencil */
+ bool invalidate_lrz;
+ bool alpha_test;
- struct fd_ringbuffer *stateobj[4];
+ struct fd_ringbuffer *stateobj[4];
};
static inline struct fd6_zsa_stateobj *
fd6_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
- return (struct fd6_zsa_stateobj *)zsa;
+ return (struct fd6_zsa_stateobj *)zsa;
}
static inline struct fd_ringbuffer *
-fd6_zsa_state(struct fd_context *ctx, bool no_alpha, bool depth_clamp)
- assert_dt
+fd6_zsa_state(struct fd_context *ctx, bool no_alpha, bool depth_clamp) assert_dt
{
- int variant = 0;
- if (no_alpha)
- variant |= FD6_ZSA_NO_ALPHA;
- if (depth_clamp)
- variant |= FD6_ZSA_DEPTH_CLAMP;
- return fd6_zsa_stateobj(ctx->zsa)->stateobj[variant];
+ int variant = 0;
+ if (no_alpha)
+ variant |= FD6_ZSA_NO_ALPHA;
+ if (depth_clamp)
+ variant |= FD6_ZSA_DEPTH_CLAMP;
+ return fd6_zsa_stateobj(ctx->zsa)->stateobj[variant];
}
-void * fd6_zsa_state_create(struct pipe_context *pctx,
- const struct pipe_depth_stencil_alpha_state *cso);
+void *fd6_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
-void fd6_zsa_state_delete(struct pipe_context *pctx,
- void *hwcso);
+void fd6_zsa_state_delete(struct pipe_context *pctx, void *hwcso);
#endif /* FD6_ZSA_H_ */
-
#include "freedreno_batch.h"
#include "freedreno_util.h"
-
/**
* Tracks, for a given batch key (which maps to a FBO/framebuffer state),
*
* ralloc parent is fd_autotune::ht
*/
struct fd_batch_history {
- struct fd_batch_key *key;
+ struct fd_batch_key *key;
- /* Entry in fd_autotune::lru: */
- struct list_head node;
+ /* Entry in fd_autotune::lru: */
+ struct list_head node;
- unsigned num_results;
+ unsigned num_results;
- /**
- * List of recent fd_batch_result's
- */
- struct list_head results;
+ /**
+ * List of recent fd_batch_result's
+ */
+ struct list_head results;
#define MAX_RESULTS 5
};
-
static struct fd_batch_history *
get_history(struct fd_autotune *at, struct fd_batch *batch)
{
- struct fd_batch_history *history;
+ struct fd_batch_history *history;
- if (!batch->key)
- return NULL;
+ if (!batch->key)
+ return NULL;
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
- if (entry) {
- history = entry->data;
- goto found;
- }
+ if (entry) {
+ history = entry->data;
+ goto found;
+ }
- history = rzalloc_size(at->ht, sizeof(*history));
+ history = rzalloc_size(at->ht, sizeof(*history));
- history->key = fd_batch_key_clone(history, batch->key);
- list_inithead(&history->node);
- list_inithead(&history->results);
+ history->key = fd_batch_key_clone(history, batch->key);
+ list_inithead(&history->node);
+ list_inithead(&history->results);
- /* Note: We cap # of cached GMEM states at 20.. so assuming double-
- * buffering, 40 should be a good place to cap cached autotune state
- */
- if (at->ht->entries >= 40) {
- struct fd_batch_history *last =
- list_last_entry(&at->lru, struct fd_batch_history, node);
- _mesa_hash_table_remove_key(at->ht, last->key);
- list_del(&last->node);
- ralloc_free(last);
- }
+ /* Note: We cap # of cached GMEM states at 20.. so assuming double-
+ * buffering, 40 should be a good place to cap cached autotune state
+ */
+ if (at->ht->entries >= 40) {
+ struct fd_batch_history *last =
+ list_last_entry(&at->lru, struct fd_batch_history, node);
+ _mesa_hash_table_remove_key(at->ht, last->key);
+ list_del(&last->node);
+ ralloc_free(last);
+ }
- _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key, history);
+ _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key,
+ history);
found:
- /* Move to the head of the LRU: */
- list_delinit(&history->node);
- list_add(&history->node, &at->lru);
+ /* Move to the head of the LRU: */
+ list_delinit(&history->node);
+ list_add(&history->node, &at->lru);
- return history;
+ return history;
}
static void
result_destructor(void *r)
{
- struct fd_batch_result *result = r;
+ struct fd_batch_result *result = r;
- /* Just in case we manage to somehow still be on the pending_results list: */
- list_del(&result->node);
+ /* Just in case we manage to somehow still be on the pending_results list: */
+ list_del(&result->node);
}
static struct fd_batch_result *
get_result(struct fd_autotune *at, struct fd_batch_history *history)
{
- struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
+ struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
- result->fence = ++at->fence_counter; /* pre-increment so zero isn't valid fence */
- result->idx = at->idx_counter++;
+ result->fence =
+ ++at->fence_counter; /* pre-increment so zero isn't valid fence */
+ result->idx = at->idx_counter++;
- if (at->idx_counter >= ARRAY_SIZE(at->results->result))
- at->idx_counter = 0;
+ if (at->idx_counter >= ARRAY_SIZE(at->results->result))
+ at->idx_counter = 0;
- result->history = history;
- list_addtail(&result->node, &at->pending_results);
+ result->history = history;
+ list_addtail(&result->node, &at->pending_results);
- ralloc_set_destructor(result, result_destructor);
+ ralloc_set_destructor(result, result_destructor);
- return result;
+ return result;
}
static void
process_results(struct fd_autotune *at)
{
- uint32_t current_fence = at->results->fence;
-
- list_for_each_entry_safe (struct fd_batch_result, result, &at->pending_results, node) {
- if (result->fence > current_fence)
- break;
-
- struct fd_batch_history *history = result->history;
-
- result->samples_passed = at->results->result[result->idx].samples_end -
- at->results->result[result->idx].samples_start;
-
- list_delinit(&result->node);
- list_add(&result->node, &history->results);
-
- if (history->num_results < MAX_RESULTS) {
- history->num_results++;
- } else {
- /* Once above a limit, start popping old results off the
- * tail of the list:
- */
- struct fd_batch_result *old_result =
- list_last_entry(&history->results, struct fd_batch_result, node);
- list_delinit(&old_result->node);
- ralloc_free(old_result);
- }
- }
+ uint32_t current_fence = at->results->fence;
+
+ list_for_each_entry_safe (struct fd_batch_result, result,
+ &at->pending_results, node) {
+ if (result->fence > current_fence)
+ break;
+
+ struct fd_batch_history *history = result->history;
+
+ result->samples_passed = at->results->result[result->idx].samples_end -
+ at->results->result[result->idx].samples_start;
+
+ list_delinit(&result->node);
+ list_add(&result->node, &history->results);
+
+ if (history->num_results < MAX_RESULTS) {
+ history->num_results++;
+ } else {
+ /* Once above a limit, start popping old results off the
+ * tail of the list:
+ */
+ struct fd_batch_result *old_result =
+ list_last_entry(&history->results, struct fd_batch_result, node);
+ list_delinit(&old_result->node);
+ ralloc_free(old_result);
+ }
+ }
}
static bool
fallback_use_bypass(struct fd_batch *batch)
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- /* Fallback logic if we have no historical data about the rendertarget: */
- if (batch->cleared || batch->gmem_reason ||
- ((batch->num_draws > 5) && !batch->blit) ||
- (pfb->samples > 1)) {
- return false;
- }
+ /* Fallback logic if we have no historical data about the rendertarget: */
+ if (batch->cleared || batch->gmem_reason ||
+ ((batch->num_draws > 5) && !batch->blit) || (pfb->samples > 1)) {
+ return false;
+ }
- return true;
+ return true;
}
/**
bool
fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- process_results(at);
-
- /* Only enable on gen's that opt-in (and actually have sample-passed
- * collection wired up:
- */
- if (!batch->ctx->screen->gmem_reason_mask)
- return fallback_use_bypass(batch);
-
- if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
- return fallback_use_bypass(batch);
-
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- /* If ms-rtt is involved, force GMEM, as we don't currently
- * implement a temporary render target that we can MSAA resolve
- * from
- */
- if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
- return fallback_use_bypass(batch);
- }
-
- struct fd_batch_history *history = get_history(at, batch);
- if (!history)
- return fallback_use_bypass(batch);
-
- batch->autotune_result = get_result(at, history);
- batch->autotune_result->cost = batch->cost;
-
- bool use_bypass = fallback_use_bypass(batch);
-
- if (use_bypass)
- return true;
-
- if (history->num_results > 0) {
- uint32_t total_samples = 0;
-
- // TODO we should account for clears somehow
- // TODO should we try to notice if there is a drastic change from
- // frame to frame?
- list_for_each_entry (struct fd_batch_result, result, &history->results, node) {
- total_samples += result->samples_passed;
- }
-
- float avg_samples = (float)total_samples / (float)history->num_results;
-
- /* Low sample count could mean there was only a clear.. or there was
- * a clear plus draws that touch no or few samples
- */
- if (avg_samples < 500.0)
- return true;
-
- /* Cost-per-sample is an estimate for the average number of reads+
- * writes for a given passed sample.
- */
- float sample_cost = batch->cost;
- sample_cost /= batch->num_draws;
-
- float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
- DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, total_draw_cost=%f\n",
- batch->hash, batch->num_draws, total_samples, avg_samples, sample_cost, total_draw_cost);
-
- if (total_draw_cost < 3000.0)
- return true;
- }
-
- return use_bypass;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ process_results(at);
+
+ /* Only enable on gen's that opt-in (and actually have sample-passed
+ * collection wired up:
+ */
+ if (!batch->ctx->screen->gmem_reason_mask)
+ return fallback_use_bypass(batch);
+
+ if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
+ return fallback_use_bypass(batch);
+
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ /* If ms-rtt is involved, force GMEM, as we don't currently
+ * implement a temporary render target that we can MSAA resolve
+ * from
+ */
+ if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
+ return fallback_use_bypass(batch);
+ }
+
+ struct fd_batch_history *history = get_history(at, batch);
+ if (!history)
+ return fallback_use_bypass(batch);
+
+ batch->autotune_result = get_result(at, history);
+ batch->autotune_result->cost = batch->cost;
+
+ bool use_bypass = fallback_use_bypass(batch);
+
+ if (use_bypass)
+ return true;
+
+ if (history->num_results > 0) {
+ uint32_t total_samples = 0;
+
+ // TODO we should account for clears somehow
+ // TODO should we try to notice if there is a drastic change from
+ // frame to frame?
+ list_for_each_entry (struct fd_batch_result, result, &history->results,
+ node) {
+ total_samples += result->samples_passed;
+ }
+
+ float avg_samples = (float)total_samples / (float)history->num_results;
+
+ /* Low sample count could mean there was only a clear.. or there was
+ * a clear plus draws that touch no or few samples
+ */
+ if (avg_samples < 500.0)
+ return true;
+
+ /* Cost-per-sample is an estimate for the average number of reads+
+ * writes for a given passed sample.
+ */
+ float sample_cost = batch->cost;
+ sample_cost /= batch->num_draws;
+
+ float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
+ DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, "
+ "total_draw_cost=%f\n",
+ batch->hash, batch->num_draws, total_samples, avg_samples,
+ sample_cost, total_draw_cost);
+
+ if (total_draw_cost < 3000.0)
+ return true;
+ }
+
+ return use_bypass;
}
void
fd_autotune_init(struct fd_autotune *at, struct fd_device *dev)
{
- at->ht = _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
- list_inithead(&at->lru);
+ at->ht =
+ _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
+ list_inithead(&at->lru);
- at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
- DRM_FREEDRENO_GEM_TYPE_KMEM, "autotune");
- at->results = fd_bo_map(at->results_mem);
+ at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "autotune");
+ at->results = fd_bo_map(at->results_mem);
- list_inithead(&at->pending_results);
+ list_inithead(&at->pending_results);
}
void
fd_autotune_fini(struct fd_autotune *at)
{
- _mesa_hash_table_destroy(at->ht, NULL);
- fd_bo_del(at->results_mem);
+ _mesa_hash_table_destroy(at->ht, NULL);
+ fd_bo_del(at->results_mem);
}
*/
struct fd_autotune {
- /**
- * Cache to map batch->key (also used for batch-cache) to historical
- * information about rendering to that particular render target.
- */
- struct hash_table *ht;
-
- /**
- * List of recently used historical results (to age out old results)
- */
- struct list_head lru;
-
- /**
- * GPU buffer used to communicate back results to the CPU
- */
- struct fd_bo *results_mem;
- struct fd_autotune_results *results;
-
- /**
- * List of per-batch results that we are waiting for the GPU to finish
- * with before reading back the results.
- */
- struct list_head pending_results;
-
- uint32_t fence_counter;
- uint32_t idx_counter;
+ /**
+ * Cache to map batch->key (also used for batch-cache) to historical
+ * information about rendering to that particular render target.
+ */
+ struct hash_table *ht;
+
+ /**
+ * List of recently used historical results (to age out old results)
+ */
+ struct list_head lru;
+
+ /**
+ * GPU buffer used to communicate back results to the CPU
+ */
+ struct fd_bo *results_mem;
+ struct fd_autotune_results *results;
+
+ /**
+ * List of per-batch results that we are waiting for the GPU to finish
+ * with before reading back the results.
+ */
+ struct list_head pending_results;
+
+ uint32_t fence_counter;
+ uint32_t idx_counter;
};
/**
*/
struct fd_autotune_results {
- /**
- * The GPU writes back a "fence" seqno value from the cmdstream after
- * it finishes writing it's result slot, so that the CPU knows when
- * results are valid
- */
- uint32_t fence;
-
- uint32_t __pad0;
- uint64_t __pad1;
-
- /**
- * From the cmdstream, the captured samples-passed values are recorded
- * at the start and end of the batch.
- *
- * Note that we do the math on the CPU to avoid a WFI. But pre-emption
- * may force us to revisit that.
- */
- struct {
- uint64_t samples_start;
- uint64_t __pad0;
- uint64_t samples_end;
- uint64_t __pad1;
- } result[127];
+ /**
+ * The GPU writes back a "fence" seqno value from the cmdstream after
+ * it finishes writing it's result slot, so that the CPU knows when
+ * results are valid
+ */
+ uint32_t fence;
+
+ uint32_t __pad0;
+ uint64_t __pad1;
+
+ /**
+ * From the cmdstream, the captured samples-passed values are recorded
+ * at the start and end of the batch.
+ *
+ * Note that we do the math on the CPU to avoid a WFI. But pre-emption
+ * may force us to revisit that.
+ */
+ struct {
+ uint64_t samples_start;
+ uint64_t __pad0;
+ uint64_t samples_end;
+ uint64_t __pad1;
+ } result[127];
};
-#define offset(base, ptr) ((uint8_t *)(ptr) - (uint8_t *)(base))
-#define results_ptr(at, member) \
- (at)->results_mem, offset((at)->results, &(at)->results->member), 0, 0
+#define offset(base, ptr) ((uint8_t *)(ptr) - (uint8_t *)(base))
+#define results_ptr(at, member) \
+ (at)->results_mem, offset((at)->results, &(at)->results->member), 0, 0
struct fd_batch_history;
*/
struct fd_batch_result {
- /**
- * The index/slot in fd_autotune_results::result[] to write start/end
- * counter to
- */
- unsigned idx;
-
- /**
- * Fence value to write back to fd_autotune_results::fence after both
- * start/end values written
- */
- uint32_t fence;
-
- /*
- * Below here, only used internally within autotune
- */
- struct fd_batch_history *history;
- struct list_head node;
- uint32_t cost;
- uint64_t samples_passed;
+ /**
+ * The index/slot in fd_autotune_results::result[] to write start/end
+ * counter to
+ */
+ unsigned idx;
+
+ /**
+ * Fence value to write back to fd_autotune_results::fence after both
+ * start/end values written
+ */
+ uint32_t fence;
+
+ /*
+ * Below here, only used internally within autotune
+ */
+ struct fd_batch_history *history;
+ struct list_head node;
+ uint32_t cost;
+ uint64_t samples_passed;
};
void fd_autotune_init(struct fd_autotune *at, struct fd_device *dev);
void fd_autotune_fini(struct fd_autotune *at);
struct fd_batch;
-bool fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch) assert_dt;
+bool fd_autotune_use_bypass(struct fd_autotune *at,
+ struct fd_batch *batch) assert_dt;
#endif /* FREEDRENO_AUTOTUNE_H */
* Rob Clark <robclark@freedesktop.org>
*/
+#include "util/hash_table.h"
#include "util/list.h"
#include "util/set.h"
-#include "util/hash_table.h"
#include "util/u_string.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "freedreno_fence.h"
-#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
static struct fd_ringbuffer *
alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags)
{
- struct fd_context *ctx = batch->ctx;
-
- /* if kernel is too old to support unlimited # of cmd buffers, we
- * have no option but to allocate large worst-case sizes so that
- * we don't need to grow the ringbuffer. Performance is likely to
- * suffer, but there is no good alternative.
- *
- * Otherwise if supported, allocate a growable ring with initial
- * size of zero.
- */
- if ((fd_device_version(ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) &&
- !FD_DBG(NOGROW)) {
- flags |= FD_RINGBUFFER_GROWABLE;
- sz = 0;
- }
-
- return fd_submit_new_ringbuffer(batch->submit, sz, flags);
+ struct fd_context *ctx = batch->ctx;
+
+ /* if kernel is too old to support unlimited # of cmd buffers, we
+ * have no option but to allocate large worst-case sizes so that
+ * we don't need to grow the ringbuffer. Performance is likely to
+ * suffer, but there is no good alternative.
+ *
+ * Otherwise if supported, allocate a growable ring with initial
+ * size of zero.
+ */
+ if ((fd_device_version(ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) &&
+ !FD_DBG(NOGROW)) {
+ flags |= FD_RINGBUFFER_GROWABLE;
+ sz = 0;
+ }
+
+ return fd_submit_new_ringbuffer(batch->submit, sz, flags);
}
static void
batch_init(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
-
- batch->submit = fd_submit_new(ctx->pipe);
- if (batch->nondraw) {
- batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY);
- batch->draw = alloc_ring(batch, 0x100000, 0);
- } else {
- batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
- batch->draw = alloc_ring(batch, 0x100000, 0);
-
- /* a6xx+ re-uses draw rb for both draw and binning pass: */
- if (ctx->screen->gpu_id < 600) {
- batch->binning = alloc_ring(batch, 0x100000, 0);
- }
- }
-
- batch->in_fence_fd = -1;
- batch->fence = fd_fence_create(batch);
-
- batch->cleared = 0;
- batch->fast_cleared = 0;
- batch->invalidated = 0;
- batch->restore = batch->resolve = 0;
- batch->needs_flush = false;
- batch->flushed = false;
- batch->gmem_reason = 0;
- batch->num_draws = 0;
- batch->num_vertices = 0;
- batch->num_bins_per_pipe = 0;
- batch->prim_strm_bits = 0;
- batch->draw_strm_bits = 0;
-
- fd_reset_wfi(batch);
-
- util_dynarray_init(&batch->draw_patches, NULL);
- util_dynarray_init(&batch->fb_read_patches, NULL);
-
- if (is_a2xx(ctx->screen)) {
- util_dynarray_init(&batch->shader_patches, NULL);
- util_dynarray_init(&batch->gmem_patches, NULL);
- }
-
- if (is_a3xx(ctx->screen))
- util_dynarray_init(&batch->rbrc_patches, NULL);
-
- assert(batch->resources->entries == 0);
-
- util_dynarray_init(&batch->samples, NULL);
-
- u_trace_init(&batch->trace, &ctx->trace_context);
- batch->last_timestamp_cmd = NULL;
+ struct fd_context *ctx = batch->ctx;
+
+ batch->submit = fd_submit_new(ctx->pipe);
+ if (batch->nondraw) {
+ batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY);
+ batch->draw = alloc_ring(batch, 0x100000, 0);
+ } else {
+ batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
+ batch->draw = alloc_ring(batch, 0x100000, 0);
+
+ /* a6xx+ re-uses draw rb for both draw and binning pass: */
+ if (ctx->screen->gpu_id < 600) {
+ batch->binning = alloc_ring(batch, 0x100000, 0);
+ }
+ }
+
+ batch->in_fence_fd = -1;
+ batch->fence = fd_fence_create(batch);
+
+ batch->cleared = 0;
+ batch->fast_cleared = 0;
+ batch->invalidated = 0;
+ batch->restore = batch->resolve = 0;
+ batch->needs_flush = false;
+ batch->flushed = false;
+ batch->gmem_reason = 0;
+ batch->num_draws = 0;
+ batch->num_vertices = 0;
+ batch->num_bins_per_pipe = 0;
+ batch->prim_strm_bits = 0;
+ batch->draw_strm_bits = 0;
+
+ fd_reset_wfi(batch);
+
+ util_dynarray_init(&batch->draw_patches, NULL);
+ util_dynarray_init(&batch->fb_read_patches, NULL);
+
+ if (is_a2xx(ctx->screen)) {
+ util_dynarray_init(&batch->shader_patches, NULL);
+ util_dynarray_init(&batch->gmem_patches, NULL);
+ }
+
+ if (is_a3xx(ctx->screen))
+ util_dynarray_init(&batch->rbrc_patches, NULL);
+
+ assert(batch->resources->entries == 0);
+
+ util_dynarray_init(&batch->samples, NULL);
+
+ u_trace_init(&batch->trace, &ctx->trace_context);
+ batch->last_timestamp_cmd = NULL;
}
struct fd_batch *
fd_batch_create(struct fd_context *ctx, bool nondraw)
{
- struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+ struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
- if (!batch)
- return NULL;
+ if (!batch)
+ return NULL;
- DBG("%p", batch);
+ DBG("%p", batch);
- pipe_reference_init(&batch->reference, 1);
- batch->ctx = ctx;
- batch->nondraw = nondraw;
+ pipe_reference_init(&batch->reference, 1);
+ batch->ctx = ctx;
+ batch->nondraw = nondraw;
- simple_mtx_init(&batch->submit_lock, mtx_plain);
+ simple_mtx_init(&batch->submit_lock, mtx_plain);
- batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ batch->resources =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- batch_init(batch);
+ batch_init(batch);
- fd_screen_assert_locked(ctx->screen);
- if (BATCH_DEBUG) {
- _mesa_set_add(ctx->screen->live_batches, batch);
- }
+ fd_screen_assert_locked(ctx->screen);
+ if (BATCH_DEBUG) {
+ _mesa_set_add(ctx->screen->live_batches, batch);
+ }
- return batch;
+ return batch;
}
static void
cleanup_submit(struct fd_batch *batch)
{
- if (!batch->submit)
- return;
-
- fd_ringbuffer_del(batch->draw);
- fd_ringbuffer_del(batch->gmem);
-
- if (batch->binning) {
- fd_ringbuffer_del(batch->binning);
- batch->binning = NULL;
- }
-
- if (batch->prologue) {
- fd_ringbuffer_del(batch->prologue);
- batch->prologue = NULL;
- }
-
- if (batch->epilogue) {
- fd_ringbuffer_del(batch->epilogue);
- batch->epilogue = NULL;
- }
-
- if (batch->tile_setup) {
- fd_ringbuffer_del(batch->tile_setup);
- batch->tile_setup = NULL;
- }
-
- if (batch->tile_fini) {
- fd_ringbuffer_del(batch->tile_fini);
- batch->tile_fini = NULL;
- }
-
- if (batch->tessellation) {
- fd_bo_del(batch->tessfactor_bo);
- fd_bo_del(batch->tessparam_bo);
- fd_ringbuffer_del(batch->tess_addrs_constobj);
- }
-
- fd_submit_del(batch->submit);
- batch->submit = NULL;
+ if (!batch->submit)
+ return;
+
+ fd_ringbuffer_del(batch->draw);
+ fd_ringbuffer_del(batch->gmem);
+
+ if (batch->binning) {
+ fd_ringbuffer_del(batch->binning);
+ batch->binning = NULL;
+ }
+
+ if (batch->prologue) {
+ fd_ringbuffer_del(batch->prologue);
+ batch->prologue = NULL;
+ }
+
+ if (batch->epilogue) {
+ fd_ringbuffer_del(batch->epilogue);
+ batch->epilogue = NULL;
+ }
+
+ if (batch->tile_setup) {
+ fd_ringbuffer_del(batch->tile_setup);
+ batch->tile_setup = NULL;
+ }
+
+ if (batch->tile_fini) {
+ fd_ringbuffer_del(batch->tile_fini);
+ batch->tile_fini = NULL;
+ }
+
+ if (batch->tessellation) {
+ fd_bo_del(batch->tessfactor_bo);
+ fd_bo_del(batch->tessparam_bo);
+ fd_ringbuffer_del(batch->tess_addrs_constobj);
+ }
+
+ fd_submit_del(batch->submit);
+ batch->submit = NULL;
}
static void
batch_fini(struct fd_batch *batch)
{
- DBG("%p", batch);
+ DBG("%p", batch);
- pipe_resource_reference(&batch->query_buf, NULL);
+ pipe_resource_reference(&batch->query_buf, NULL);
- if (batch->in_fence_fd != -1)
- close(batch->in_fence_fd);
+ if (batch->in_fence_fd != -1)
+ close(batch->in_fence_fd);
- /* in case batch wasn't flushed but fence was created: */
- fd_fence_populate(batch->fence, 0, -1);
+ /* in case batch wasn't flushed but fence was created: */
+ fd_fence_populate(batch->fence, 0, -1);
- fd_fence_ref(&batch->fence, NULL);
+ fd_fence_ref(&batch->fence, NULL);
- cleanup_submit(batch);
+ cleanup_submit(batch);
- util_dynarray_fini(&batch->draw_patches);
- util_dynarray_fini(&batch->fb_read_patches);
+ util_dynarray_fini(&batch->draw_patches);
+ util_dynarray_fini(&batch->fb_read_patches);
- if (is_a2xx(batch->ctx->screen)) {
- util_dynarray_fini(&batch->shader_patches);
- util_dynarray_fini(&batch->gmem_patches);
- }
+ if (is_a2xx(batch->ctx->screen)) {
+ util_dynarray_fini(&batch->shader_patches);
+ util_dynarray_fini(&batch->gmem_patches);
+ }
- if (is_a3xx(batch->ctx->screen))
- util_dynarray_fini(&batch->rbrc_patches);
+ if (is_a3xx(batch->ctx->screen))
+ util_dynarray_fini(&batch->rbrc_patches);
- while (batch->samples.size > 0) {
- struct fd_hw_sample *samp =
- util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
- fd_hw_sample_reference(batch->ctx, &samp, NULL);
- }
- util_dynarray_fini(&batch->samples);
+ while (batch->samples.size > 0) {
+ struct fd_hw_sample *samp =
+ util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
+ fd_hw_sample_reference(batch->ctx, &samp, NULL);
+ }
+ util_dynarray_fini(&batch->samples);
- u_trace_fini(&batch->trace);
+ u_trace_fini(&batch->trace);
}
static void
-batch_flush_dependencies(struct fd_batch *batch)
- assert_dt
+batch_flush_dependencies(struct fd_batch *batch) assert_dt
{
- struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
- struct fd_batch *dep;
+ struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+ struct fd_batch *dep;
- foreach_batch (dep, cache, batch->dependents_mask) {
- fd_batch_flush(dep);
- fd_batch_reference(&dep, NULL);
- }
+ foreach_batch(dep, cache, batch->dependents_mask)
+ {
+ fd_batch_flush(dep);
+ fd_batch_reference(&dep, NULL);
+ }
- batch->dependents_mask = 0;
+ batch->dependents_mask = 0;
}
static void
batch_reset_dependencies(struct fd_batch *batch)
{
- struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
- struct fd_batch *dep;
+ struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+ struct fd_batch *dep;
- foreach_batch (dep, cache, batch->dependents_mask) {
- fd_batch_reference(&dep, NULL);
- }
+ foreach_batch(dep, cache, batch->dependents_mask)
+ {
+ fd_batch_reference(&dep, NULL);
+ }
- batch->dependents_mask = 0;
+ batch->dependents_mask = 0;
}
static void
batch_reset_resources_locked(struct fd_batch *batch)
{
- fd_screen_assert_locked(batch->ctx->screen);
-
- set_foreach(batch->resources, entry) {
- struct fd_resource *rsc = (struct fd_resource *)entry->key;
- _mesa_set_remove(batch->resources, entry);
- debug_assert(rsc->track->batch_mask & (1 << batch->idx));
- rsc->track->batch_mask &= ~(1 << batch->idx);
- if (rsc->track->write_batch == batch)
- fd_batch_reference_locked(&rsc->track->write_batch, NULL);
- }
+ fd_screen_assert_locked(batch->ctx->screen);
+
+ set_foreach (batch->resources, entry) {
+ struct fd_resource *rsc = (struct fd_resource *)entry->key;
+ _mesa_set_remove(batch->resources, entry);
+ debug_assert(rsc->track->batch_mask & (1 << batch->idx));
+ rsc->track->batch_mask &= ~(1 << batch->idx);
+ if (rsc->track->write_batch == batch)
+ fd_batch_reference_locked(&rsc->track->write_batch, NULL);
+ }
}
static void
-batch_reset_resources(struct fd_batch *batch)
- assert_dt
+batch_reset_resources(struct fd_batch *batch) assert_dt
{
- fd_screen_lock(batch->ctx->screen);
- batch_reset_resources_locked(batch);
- fd_screen_unlock(batch->ctx->screen);
+ fd_screen_lock(batch->ctx->screen);
+ batch_reset_resources_locked(batch);
+ fd_screen_unlock(batch->ctx->screen);
}
static void
-batch_reset(struct fd_batch *batch)
- assert_dt
+batch_reset(struct fd_batch *batch) assert_dt
{
- DBG("%p", batch);
+ DBG("%p", batch);
- batch_reset_dependencies(batch);
- batch_reset_resources(batch);
+ batch_reset_dependencies(batch);
+ batch_reset_resources(batch);
- batch_fini(batch);
- batch_init(batch);
+ batch_fini(batch);
+ batch_init(batch);
}
void
fd_batch_reset(struct fd_batch *batch)
{
- if (batch->needs_flush)
- batch_reset(batch);
+ if (batch->needs_flush)
+ batch_reset(batch);
}
void
__fd_batch_destroy(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- DBG("%p", batch);
+ DBG("%p", batch);
- fd_screen_assert_locked(batch->ctx->screen);
+ fd_screen_assert_locked(batch->ctx->screen);
- if (BATCH_DEBUG) {
- _mesa_set_remove_key(ctx->screen->live_batches, batch);
- }
+ if (BATCH_DEBUG) {
+ _mesa_set_remove_key(ctx->screen->live_batches, batch);
+ }
- fd_bc_invalidate_batch(batch, true);
+ fd_bc_invalidate_batch(batch, true);
- batch_reset_resources_locked(batch);
- debug_assert(batch->resources->entries == 0);
- _mesa_set_destroy(batch->resources, NULL);
+ batch_reset_resources_locked(batch);
+ debug_assert(batch->resources->entries == 0);
+ _mesa_set_destroy(batch->resources, NULL);
- fd_screen_unlock(ctx->screen);
- batch_reset_dependencies(batch);
- debug_assert(batch->dependents_mask == 0);
+ fd_screen_unlock(ctx->screen);
+ batch_reset_dependencies(batch);
+ debug_assert(batch->dependents_mask == 0);
- util_copy_framebuffer_state(&batch->framebuffer, NULL);
- batch_fini(batch);
+ util_copy_framebuffer_state(&batch->framebuffer, NULL);
+ batch_fini(batch);
- simple_mtx_destroy(&batch->submit_lock);
+ simple_mtx_destroy(&batch->submit_lock);
- free(batch);
- fd_screen_lock(ctx->screen);
+ free(batch);
+ fd_screen_lock(ctx->screen);
}
void
-__fd_batch_describe(char* buf, const struct fd_batch *batch)
+__fd_batch_describe(char *buf, const struct fd_batch *batch)
{
- sprintf(buf, "fd_batch<%u>", batch->seqno);
+ sprintf(buf, "fd_batch<%u>", batch->seqno);
}
/* Get per-batch prologue */
struct fd_ringbuffer *
fd_batch_get_prologue(struct fd_batch *batch)
{
- if (!batch->prologue)
- batch->prologue = alloc_ring(batch, 0x1000, 0);
- return batch->prologue;
+ if (!batch->prologue)
+ batch->prologue = alloc_ring(batch, 0x1000, 0);
+ return batch->prologue;
}
/* Only called from fd_batch_flush() */
static void
-batch_flush(struct fd_batch *batch)
- assert_dt
+batch_flush(struct fd_batch *batch) assert_dt
{
- DBG("%p: needs_flush=%d", batch, batch->needs_flush);
+ DBG("%p: needs_flush=%d", batch, batch->needs_flush);
- if (!fd_batch_lock_submit(batch))
- return;
+ if (!fd_batch_lock_submit(batch))
+ return;
- batch->needs_flush = false;
+ batch->needs_flush = false;
- /* close out the draw cmds by making sure any active queries are
- * paused:
- */
- fd_batch_finish_queries(batch);
+ /* close out the draw cmds by making sure any active queries are
+ * paused:
+ */
+ fd_batch_finish_queries(batch);
- batch_flush_dependencies(batch);
+ batch_flush_dependencies(batch);
- batch->flushed = true;
- if (batch == batch->ctx->batch)
- fd_batch_reference(&batch->ctx->batch, NULL);
+ batch->flushed = true;
+ if (batch == batch->ctx->batch)
+ fd_batch_reference(&batch->ctx->batch, NULL);
- fd_fence_ref(&batch->ctx->last_fence, batch->fence);
+ fd_fence_ref(&batch->ctx->last_fence, batch->fence);
- fd_gmem_render_tiles(batch);
- batch_reset_resources(batch);
+ fd_gmem_render_tiles(batch);
+ batch_reset_resources(batch);
- debug_assert(batch->reference.count > 0);
+ debug_assert(batch->reference.count > 0);
- fd_screen_lock(batch->ctx->screen);
- /* NOTE: remove=false removes the patch from the hashtable, so future
- * lookups won't cache-hit a flushed batch, but leaves the weak reference
- * to the batch to avoid having multiple batches with same batch->idx, as
- * that causes all sorts of hilarity.
- */
- fd_bc_invalidate_batch(batch, false);
- fd_screen_unlock(batch->ctx->screen);
- cleanup_submit(batch);
- fd_batch_unlock_submit(batch);
+ fd_screen_lock(batch->ctx->screen);
+ /* NOTE: remove=false removes the patch from the hashtable, so future
+ * lookups won't cache-hit a flushed batch, but leaves the weak reference
+ * to the batch to avoid having multiple batches with same batch->idx, as
+ * that causes all sorts of hilarity.
+ */
+ fd_bc_invalidate_batch(batch, false);
+ fd_screen_unlock(batch->ctx->screen);
+ cleanup_submit(batch);
+ fd_batch_unlock_submit(batch);
}
/* NOTE: could drop the last ref to batch
void
fd_batch_flush(struct fd_batch *batch)
{
- struct fd_batch *tmp = NULL;
-
- /* NOTE: we need to hold an extra ref across the body of flush,
- * since the last ref to this batch could be dropped when cleaning
- * up used_resources
- */
- fd_batch_reference(&tmp, batch);
- batch_flush(tmp);
- fd_batch_reference(&tmp, NULL);
+ struct fd_batch *tmp = NULL;
+
+ /* NOTE: we need to hold an extra ref across the body of flush,
+ * since the last ref to this batch could be dropped when cleaning
+ * up used_resources
+ */
+ fd_batch_reference(&tmp, batch);
+ batch_flush(tmp);
+ fd_batch_reference(&tmp, NULL);
}
/* find a batches dependents mask, including recursive dependencies: */
static uint32_t
recursive_dependents_mask(struct fd_batch *batch)
{
- struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
- struct fd_batch *dep;
- uint32_t dependents_mask = batch->dependents_mask;
+ struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+ struct fd_batch *dep;
+ uint32_t dependents_mask = batch->dependents_mask;
- foreach_batch(dep, cache, batch->dependents_mask)
- dependents_mask |= recursive_dependents_mask(dep);
+ foreach_batch(dep, cache, batch->dependents_mask) dependents_mask |=
+ recursive_dependents_mask(dep);
- return dependents_mask;
+ return dependents_mask;
}
void
fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
{
- fd_screen_assert_locked(batch->ctx->screen);
+ fd_screen_assert_locked(batch->ctx->screen);
- if (batch->dependents_mask & (1 << dep->idx))
- return;
+ if (batch->dependents_mask & (1 << dep->idx))
+ return;
- /* a loop should not be possible */
- debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));
+ /* a loop should not be possible */
+ debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));
- struct fd_batch *other = NULL;
- fd_batch_reference_locked(&other, dep);
- batch->dependents_mask |= (1 << dep->idx);
- DBG("%p: added dependency on %p", batch, dep);
+ struct fd_batch *other = NULL;
+ fd_batch_reference_locked(&other, dep);
+ batch->dependents_mask |= (1 << dep->idx);
+ DBG("%p: added dependency on %p", batch, dep);
}
static void
-flush_write_batch(struct fd_resource *rsc)
- assert_dt
+flush_write_batch(struct fd_resource *rsc) assert_dt
{
- struct fd_batch *b = NULL;
- fd_batch_reference_locked(&b, rsc->track->write_batch);
+ struct fd_batch *b = NULL;
+ fd_batch_reference_locked(&b, rsc->track->write_batch);
- fd_screen_unlock(b->ctx->screen);
- fd_batch_flush(b);
- fd_screen_lock(b->ctx->screen);
+ fd_screen_unlock(b->ctx->screen);
+ fd_batch_flush(b);
+ fd_screen_lock(b->ctx->screen);
- fd_batch_reference_locked(&b, NULL);
+ fd_batch_reference_locked(&b, NULL);
}
static void
fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
{
- if (likely(fd_batch_references_resource(batch, rsc))) {
- debug_assert(_mesa_set_search(batch->resources, rsc));
- return;
- }
+ if (likely(fd_batch_references_resource(batch, rsc))) {
+ debug_assert(_mesa_set_search(batch->resources, rsc));
+ return;
+ }
- debug_assert(!_mesa_set_search(batch->resources, rsc));
+ debug_assert(!_mesa_set_search(batch->resources, rsc));
- _mesa_set_add(batch->resources, rsc);
- rsc->track->batch_mask |= (1 << batch->idx);
+ _mesa_set_add(batch->resources, rsc);
+ rsc->track->batch_mask |= (1 << batch->idx);
}
void
fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
{
- fd_screen_assert_locked(batch->ctx->screen);
-
- DBG("%p: write %p", batch, rsc);
-
- /* Must do this before the early out, so we unset a previous resource
- * invalidate (which may have left the write_batch state in place).
- */
- rsc->valid = true;
-
- if (rsc->track->write_batch == batch)
- return;
-
- fd_batch_write_prep(batch, rsc);
-
- if (rsc->stencil)
- fd_batch_resource_write(batch, rsc->stencil);
-
- /* note, invalidate write batch, to avoid further writes to rsc
- * resulting in a write-after-read hazard.
- */
- /* if we are pending read or write by any other batch: */
- if (unlikely(rsc->track->batch_mask & ~(1 << batch->idx))) {
- struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
- struct fd_batch *dep;
-
- if (rsc->track->write_batch)
- flush_write_batch(rsc);
-
- foreach_batch (dep, cache, rsc->track->batch_mask) {
- struct fd_batch *b = NULL;
- if (dep == batch)
- continue;
- /* note that batch_add_dep could flush and unref dep, so
- * we need to hold a reference to keep it live for the
- * fd_bc_invalidate_batch()
- */
- fd_batch_reference(&b, dep);
- fd_batch_add_dep(batch, b);
- fd_bc_invalidate_batch(b, false);
- fd_batch_reference_locked(&b, NULL);
- }
- }
- fd_batch_reference_locked(&rsc->track->write_batch, batch);
-
- fd_batch_add_resource(batch, rsc);
+ fd_screen_assert_locked(batch->ctx->screen);
+
+ DBG("%p: write %p", batch, rsc);
+
+ /* Must do this before the early out, so we unset a previous resource
+ * invalidate (which may have left the write_batch state in place).
+ */
+ rsc->valid = true;
+
+ if (rsc->track->write_batch == batch)
+ return;
+
+ fd_batch_write_prep(batch, rsc);
+
+ if (rsc->stencil)
+ fd_batch_resource_write(batch, rsc->stencil);
+
+ /* note, invalidate write batch, to avoid further writes to rsc
+ * resulting in a write-after-read hazard.
+ */
+ /* if we are pending read or write by any other batch: */
+ if (unlikely(rsc->track->batch_mask & ~(1 << batch->idx))) {
+ struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+ struct fd_batch *dep;
+
+ if (rsc->track->write_batch)
+ flush_write_batch(rsc);
+
+ foreach_batch(dep, cache, rsc->track->batch_mask)
+ {
+ struct fd_batch *b = NULL;
+ if (dep == batch)
+ continue;
+ /* note that batch_add_dep could flush and unref dep, so
+ * we need to hold a reference to keep it live for the
+ * fd_bc_invalidate_batch()
+ */
+ fd_batch_reference(&b, dep);
+ fd_batch_add_dep(batch, b);
+ fd_bc_invalidate_batch(b, false);
+ fd_batch_reference_locked(&b, NULL);
+ }
+ }
+ fd_batch_reference_locked(&rsc->track->write_batch, batch);
+
+ fd_batch_add_resource(batch, rsc);
}
void
fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)
{
- fd_screen_assert_locked(batch->ctx->screen);
+ fd_screen_assert_locked(batch->ctx->screen);
- if (rsc->stencil)
- fd_batch_resource_read(batch, rsc->stencil);
+ if (rsc->stencil)
+ fd_batch_resource_read(batch, rsc->stencil);
- DBG("%p: read %p", batch, rsc);
+ DBG("%p: read %p", batch, rsc);
- /* If reading a resource pending a write, go ahead and flush the
- * writer. This avoids situations where we end up having to
- * flush the current batch in _resource_used()
- */
- if (unlikely(rsc->track->write_batch && rsc->track->write_batch != batch))
- flush_write_batch(rsc);
+ /* If reading a resource pending a write, go ahead and flush the
+ * writer. This avoids situations where we end up having to
+ * flush the current batch in _resource_used()
+ */
+ if (unlikely(rsc->track->write_batch && rsc->track->write_batch != batch))
+ flush_write_batch(rsc);
- fd_batch_add_resource(batch, rsc);
+ fd_batch_add_resource(batch, rsc);
}
void
fd_batch_check_size(struct fd_batch *batch)
{
- debug_assert(!batch->flushed);
-
- if (FD_DBG(FLUSH)) {
- fd_batch_flush(batch);
- return;
- }
-
- /* Place a reasonable upper bound on prim/draw stream buffer size: */
- const unsigned limit_bits = 8 * 8 * 1024 * 1024;
- if ((batch->prim_strm_bits > limit_bits) || (batch->draw_strm_bits > limit_bits)) {
- fd_batch_flush(batch);
- return;
- }
-
- if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS)
- return;
-
- struct fd_ringbuffer *ring = batch->draw;
- if ((ring->cur - ring->start) > (ring->size/4 - 0x1000))
- fd_batch_flush(batch);
+ debug_assert(!batch->flushed);
+
+ if (FD_DBG(FLUSH)) {
+ fd_batch_flush(batch);
+ return;
+ }
+
+ /* Place a reasonable upper bound on prim/draw stream buffer size: */
+ const unsigned limit_bits = 8 * 8 * 1024 * 1024;
+ if ((batch->prim_strm_bits > limit_bits) ||
+ (batch->draw_strm_bits > limit_bits)) {
+ fd_batch_flush(batch);
+ return;
+ }
+
+ if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS)
+ return;
+
+ struct fd_ringbuffer *ring = batch->draw;
+ if ((ring->cur - ring->start) > (ring->size / 4 - 0x1000))
+ fd_batch_flush(batch);
}
/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
void
fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- if (batch->needs_wfi) {
- if (batch->ctx->screen->gpu_id >= 500)
- OUT_WFI5(ring);
- else
- OUT_WFI(ring);
- batch->needs_wfi = false;
- }
+ if (batch->needs_wfi) {
+ if (batch->ctx->screen->gpu_id >= 500)
+ OUT_WFI5(ring);
+ else
+ OUT_WFI(ring);
+ batch->needs_wfi = false;
+ }
}
#ifndef FREEDRENO_BATCH_H_
#define FREEDRENO_BATCH_H_
+#include "util/list.h"
+#include "util/simple_mtx.h"
#include "util/u_inlines.h"
#include "util/u_queue.h"
#include "util/u_trace.h"
-#include "util/list.h"
-#include "util/simple_mtx.h"
#include "freedreno_context.h"
#include "freedreno_util.h"
#ifdef DEBUG
-# define BATCH_DEBUG FD_DBG(MSGS)
+#define BATCH_DEBUG FD_DBG(MSGS)
#else
-# define BATCH_DEBUG 0
+#define BATCH_DEBUG 0
#endif
struct fd_resource;
* fd_resource-s, etc.
*/
struct fd_batch {
- struct pipe_reference reference;
- unsigned seqno;
- unsigned idx; /* index into cache->batches[] */
-
- struct u_trace trace;
-
- /* To detect cases where we can skip cmdstream to record timestamp: */
- uint32_t *last_timestamp_cmd;
-
- int in_fence_fd;
- bool needs_out_fence_fd;
- struct pipe_fence_handle *fence;
-
- struct fd_context *ctx;
-
- /* emit_lock serializes cmdstream emission and flush. Acquire before
- * screen->lock.
- */
- simple_mtx_t submit_lock;
-
- /* do we need to mem2gmem before rendering. We don't, if for example,
- * there was a glClear() that invalidated the entire previous buffer
- * contents. Keep track of which buffer(s) are cleared, or needs
- * restore. Masks of PIPE_CLEAR_*
- *
- * The 'cleared' bits will be set for buffers which are *entirely*
- * cleared, and 'partial_cleared' bits will be set if you must
- * check cleared_scissor.
- *
- * The 'invalidated' bits are set for cleared buffers, and buffers
- * where the contents are undefined, ie. what we don't need to restore
- * to gmem.
- */
- enum {
- /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
- FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
- FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
- FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
- FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
- } invalidated, cleared, fast_cleared, restore, resolve;
-
- /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
- bool nondraw : 1;
- bool needs_flush : 1;
- bool flushed : 1;
- bool blit : 1;
- bool back_blit : 1; /* only blit so far is resource shadowing back-blit */
- bool tessellation : 1; /* tessellation used in batch */
-
- /* Keep track if WAIT_FOR_IDLE is needed for registers we need
- * to update via RMW:
- */
- bool needs_wfi : 1;
-
- /* To decide whether to render to system memory, keep track of the
- * number of draws, and whether any of them require multisample,
- * depth_test (or depth write), stencil_test, blending, and
- * color_logic_Op (since those functions are disabled when by-
- * passing GMEM.
- */
- enum fd_gmem_reason gmem_reason;
-
- /* At submit time, once we've decided that this batch will use GMEM
- * rendering, the appropriate gmem state is looked up:
- */
- const struct fd_gmem_stateobj *gmem_state;
-
- /* A calculated "draw cost" value for the batch, which tries to
- * estimate the bandwidth-per-sample of all the draws according
- * to:
- *
- * foreach_draw (...) {
- * cost += num_mrt;
- * if (blend_enabled)
- * cost += num_mrt;
- * if (depth_test_enabled)
- * cost++;
- * if (depth_write_enabled)
- * cost++;
- * }
- *
- * The idea is that each sample-passed minimally does one write
- * per MRT. If blend is enabled, the hw will additionally do
- * a framebuffer read per sample-passed (for each MRT with blend
- * enabled). If depth-test is enabled, the hw will additionally
- * a depth buffer read. If depth-write is enable, the hw will
- * additionally do a depth buffer write.
- *
- * This does ignore depth buffer traffic for samples which do not
- * pass do to depth-test fail, and some other details. But it is
- * just intended to be a rough estimate that is easy to calculate.
- */
- unsigned cost;
-
- /* Tells the gen specific backend where to write stats used for
- * the autotune module.
- *
- * Pointer only valid during gmem emit code.
- */
- struct fd_batch_result *autotune_result;
-
- unsigned num_draws; /* number of draws in current batch */
- unsigned num_vertices; /* number of vertices in current batch */
-
- /* Currently only used on a6xx, to calculate vsc prim/draw stream
- * sizes:
- */
- unsigned num_bins_per_pipe;
- unsigned prim_strm_bits;
- unsigned draw_strm_bits;
-
- /* Track the maximal bounds of the scissor of all the draws within a
- * batch. Used at the tile rendering step (fd_gmem_render_tiles(),
- * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
- */
- struct pipe_scissor_state max_scissor;
-
- /* Keep track of DRAW initiators that need to be patched up depending
- * on whether we using binning or not:
- */
- struct util_dynarray draw_patches;
-
- /* texture state that needs patching for fb_read: */
- struct util_dynarray fb_read_patches;
-
- /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
- * once we know whether or not to use GMEM, and GMEM tile pitch.
- *
- * (only for a3xx.. but having gen specific subclasses of fd_batch
- * seemed overkill for now)
- */
- struct util_dynarray rbrc_patches;
-
- /* Keep track of GMEM related values that need to be patched up once we
- * know the gmem layout:
- */
- struct util_dynarray gmem_patches;
-
- /* Keep track of pointer to start of MEM exports for a20x binning shaders
- *
- * this is so the end of the shader can be cut off at the right point
- * depending on the GMEM configuration
- */
- struct util_dynarray shader_patches;
-
- struct pipe_framebuffer_state framebuffer;
-
- struct fd_submit *submit;
-
- /** draw pass cmdstream: */
- struct fd_ringbuffer *draw;
- /** binning pass cmdstream: */
- struct fd_ringbuffer *binning;
- /** tiling/gmem (IB0) cmdstream: */
- struct fd_ringbuffer *gmem;
-
- /** preemble cmdstream (executed once before first tile): */
- struct fd_ringbuffer *prologue;
-
- /** epilogue cmdstream (executed after each tile): */
- struct fd_ringbuffer *epilogue;
-
- struct fd_ringbuffer *tile_setup;
- struct fd_ringbuffer *tile_fini;
-
- union pipe_color_union clear_color[MAX_RENDER_TARGETS];
- double clear_depth;
- unsigned clear_stencil;
-
- /**
- * hw query related state:
- */
- /*@{*/
- /* next sample offset.. incremented for each sample in the batch/
- * submit, reset to zero on next submit.
- */
- uint32_t next_sample_offset;
-
- /* cached samples (in case multiple queries need to reference
- * the same sample snapshot)
- */
- struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
-
- /* which sample providers were used in the current batch: */
- uint32_t query_providers_used;
-
- /* which sample providers are currently enabled in the batch: */
- uint32_t query_providers_active;
-
- /* list of samples in current batch: */
- struct util_dynarray samples;
-
- /* current query result bo and tile stride: */
- struct pipe_resource *query_buf;
- uint32_t query_tile_stride;
- /*@}*/
-
-
- /* Set of resources used by currently-unsubmitted batch (read or
- * write).. does not hold a reference to the resource.
- */
- struct set *resources;
-
- /** key in batch-cache (if not null): */
- const struct fd_batch_key *key;
- uint32_t hash;
-
- /** set of dependent batches.. holds refs to dependent batches: */
- uint32_t dependents_mask;
-
- /* Buffer for tessellation engine input
- */
- struct fd_bo *tessfactor_bo;
- uint32_t tessfactor_size;
-
- /* Buffer for passing parameters between TCS and TES
- */
- struct fd_bo *tessparam_bo;
- uint32_t tessparam_size;
-
- struct fd_ringbuffer *tess_addrs_constobj;
+ struct pipe_reference reference;
+ unsigned seqno;
+ unsigned idx; /* index into cache->batches[] */
+
+ struct u_trace trace;
+
+ /* To detect cases where we can skip cmdstream to record timestamp: */
+ uint32_t *last_timestamp_cmd;
+
+ int in_fence_fd;
+ bool needs_out_fence_fd;
+ struct pipe_fence_handle *fence;
+
+ struct fd_context *ctx;
+
+ /* emit_lock serializes cmdstream emission and flush. Acquire before
+ * screen->lock.
+ */
+ simple_mtx_t submit_lock;
+
+ /* do we need to mem2gmem before rendering. We don't, if for example,
+ * there was a glClear() that invalidated the entire previous buffer
+ * contents. Keep track of which buffer(s) are cleared, or needs
+ * restore. Masks of PIPE_CLEAR_*
+ *
+ * The 'cleared' bits will be set for buffers which are *entirely*
+ * cleared, and 'partial_cleared' bits will be set if you must
+ * check cleared_scissor.
+ *
+ * The 'invalidated' bits are set for cleared buffers, and buffers
+ * where the contents are undefined, ie. what we don't need to restore
+ * to gmem.
+ */
+ enum {
+ /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
+ FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
+ FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
+ FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
+ FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
+ } invalidated,
+ cleared, fast_cleared, restore, resolve;
+
+ /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
+ bool nondraw : 1;
+ bool needs_flush : 1;
+ bool flushed : 1;
+ bool blit : 1;
+ bool back_blit : 1; /* only blit so far is resource shadowing back-blit */
+ bool tessellation : 1; /* tessellation used in batch */
+
+ /* Keep track if WAIT_FOR_IDLE is needed for registers we need
+ * to update via RMW:
+ */
+ bool needs_wfi : 1;
+
+ /* To decide whether to render to system memory, keep track of the
+ * number of draws, and whether any of them require multisample,
+ * depth_test (or depth write), stencil_test, blending, and
+ * color_logic_Op (since those functions are disabled when by-
+ * passing GMEM.
+ */
+ enum fd_gmem_reason gmem_reason;
+
+ /* At submit time, once we've decided that this batch will use GMEM
+ * rendering, the appropriate gmem state is looked up:
+ */
+ const struct fd_gmem_stateobj *gmem_state;
+
+ /* A calculated "draw cost" value for the batch, which tries to
+ * estimate the bandwidth-per-sample of all the draws according
+ * to:
+ *
+ * foreach_draw (...) {
+ * cost += num_mrt;
+ * if (blend_enabled)
+ * cost += num_mrt;
+ * if (depth_test_enabled)
+ * cost++;
+ * if (depth_write_enabled)
+ * cost++;
+ * }
+ *
+ * The idea is that each sample-passed minimally does one write
+ * per MRT. If blend is enabled, the hw will additionally do
+ * a framebuffer read per sample-passed (for each MRT with blend
+ * enabled). If depth-test is enabled, the hw will additionally
+ * a depth buffer read. If depth-write is enable, the hw will
+ * additionally do a depth buffer write.
+ *
+ * This does ignore depth buffer traffic for samples which do not
+ * pass do to depth-test fail, and some other details. But it is
+ * just intended to be a rough estimate that is easy to calculate.
+ */
+ unsigned cost;
+
+ /* Tells the gen specific backend where to write stats used for
+ * the autotune module.
+ *
+ * Pointer only valid during gmem emit code.
+ */
+ struct fd_batch_result *autotune_result;
+
+ unsigned num_draws; /* number of draws in current batch */
+ unsigned num_vertices; /* number of vertices in current batch */
+
+ /* Currently only used on a6xx, to calculate vsc prim/draw stream
+ * sizes:
+ */
+ unsigned num_bins_per_pipe;
+ unsigned prim_strm_bits;
+ unsigned draw_strm_bits;
+
+ /* Track the maximal bounds of the scissor of all the draws within a
+ * batch. Used at the tile rendering step (fd_gmem_render_tiles(),
+ * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
+ */
+ struct pipe_scissor_state max_scissor;
+
+ /* Keep track of DRAW initiators that need to be patched up depending
+ * on whether we using binning or not:
+ */
+ struct util_dynarray draw_patches;
+
+ /* texture state that needs patching for fb_read: */
+ struct util_dynarray fb_read_patches;
+
+ /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+ * once we know whether or not to use GMEM, and GMEM tile pitch.
+ *
+ * (only for a3xx.. but having gen specific subclasses of fd_batch
+ * seemed overkill for now)
+ */
+ struct util_dynarray rbrc_patches;
+
+ /* Keep track of GMEM related values that need to be patched up once we
+ * know the gmem layout:
+ */
+ struct util_dynarray gmem_patches;
+
+ /* Keep track of pointer to start of MEM exports for a20x binning shaders
+ *
+ * this is so the end of the shader can be cut off at the right point
+ * depending on the GMEM configuration
+ */
+ struct util_dynarray shader_patches;
+
+ struct pipe_framebuffer_state framebuffer;
+
+ struct fd_submit *submit;
+
+ /** draw pass cmdstream: */
+ struct fd_ringbuffer *draw;
+ /** binning pass cmdstream: */
+ struct fd_ringbuffer *binning;
+ /** tiling/gmem (IB0) cmdstream: */
+ struct fd_ringbuffer *gmem;
+
+ /** preemble cmdstream (executed once before first tile): */
+ struct fd_ringbuffer *prologue;
+
+ /** epilogue cmdstream (executed after each tile): */
+ struct fd_ringbuffer *epilogue;
+
+ struct fd_ringbuffer *tile_setup;
+ struct fd_ringbuffer *tile_fini;
+
+ union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+ double clear_depth;
+ unsigned clear_stencil;
+
+ /**
+ * hw query related state:
+ */
+ /*@{*/
+ /* next sample offset.. incremented for each sample in the batch/
+ * submit, reset to zero on next submit.
+ */
+ uint32_t next_sample_offset;
+
+ /* cached samples (in case multiple queries need to reference
+ * the same sample snapshot)
+ */
+ struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+
+ /* which sample providers were used in the current batch: */
+ uint32_t query_providers_used;
+
+ /* which sample providers are currently enabled in the batch: */
+ uint32_t query_providers_active;
+
+ /* list of samples in current batch: */
+ struct util_dynarray samples;
+
+ /* current query result bo and tile stride: */
+ struct pipe_resource *query_buf;
+ uint32_t query_tile_stride;
+ /*@}*/
+
+ /* Set of resources used by currently-unsubmitted batch (read or
+ * write).. does not hold a reference to the resource.
+ */
+ struct set *resources;
+
+ /** key in batch-cache (if not null): */
+ const struct fd_batch_key *key;
+ uint32_t hash;
+
+ /** set of dependent batches.. holds refs to dependent batches: */
+ uint32_t dependents_mask;
+
+ /* Buffer for tessellation engine input
+ */
+ struct fd_bo *tessfactor_bo;
+ uint32_t tessfactor_size;
+
+ /* Buffer for passing parameters between TCS and TES
+ */
+ struct fd_bo *tessparam_bo;
+ uint32_t tessparam_size;
+
+ struct fd_ringbuffer *tess_addrs_constobj;
};
-struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
+struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
void fd_batch_reset(struct fd_batch *batch) assert_dt;
void fd_batch_flush(struct fd_batch *batch) assert_dt;
void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
-void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) assert_dt;
-void fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc) assert_dt;
+void fd_batch_resource_write(struct fd_batch *batch,
+ struct fd_resource *rsc) assert_dt;
+void fd_batch_resource_read_slowpath(struct fd_batch *batch,
+ struct fd_resource *rsc) assert_dt;
void fd_batch_check_size(struct fd_batch *batch) assert_dt;
uint32_t fd_batch_key_hash(const void *_key);
bool fd_batch_key_equals(const void *_a, const void *_b);
-struct fd_batch_key * fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key);
+struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
+ const struct fd_batch_key *key);
/* not called directly: */
-void __fd_batch_describe(char* buf, const struct fd_batch *batch) assert_dt;
+void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
void __fd_batch_destroy(struct fd_batch *batch);
/*
static inline void
fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
{
- struct fd_batch *old_batch = *ptr;
+ struct fd_batch *old_batch = *ptr;
- /* only need lock if a reference is dropped: */
- if (old_batch)
- fd_screen_assert_locked(old_batch->ctx->screen);
+ /* only need lock if a reference is dropped: */
+ if (old_batch)
+ fd_screen_assert_locked(old_batch->ctx->screen);
- if (pipe_reference_described(&(*ptr)->reference, &batch->reference,
- (debug_reference_descriptor)__fd_batch_describe))
- __fd_batch_destroy(old_batch);
+ if (pipe_reference_described(
+ &(*ptr)->reference, &batch->reference,
+ (debug_reference_descriptor)__fd_batch_describe))
+ __fd_batch_destroy(old_batch);
- *ptr = batch;
+ *ptr = batch;
}
static inline void
fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
{
- struct fd_batch *old_batch = *ptr;
- struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
+ struct fd_batch *old_batch = *ptr;
+ struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
- if (ctx)
- fd_screen_lock(ctx->screen);
+ if (ctx)
+ fd_screen_lock(ctx->screen);
- fd_batch_reference_locked(ptr, batch);
+ fd_batch_reference_locked(ptr, batch);
- if (ctx)
- fd_screen_unlock(ctx->screen);
+ if (ctx)
+ fd_screen_unlock(ctx->screen);
}
static inline void
fd_batch_unlock_submit(struct fd_batch *batch)
{
- simple_mtx_unlock(&batch->submit_lock);
+ simple_mtx_unlock(&batch->submit_lock);
}
/**
static inline bool MUST_CHECK
fd_batch_lock_submit(struct fd_batch *batch)
{
- simple_mtx_lock(&batch->submit_lock);
- bool ret = !batch->flushed;
- if (!ret)
- fd_batch_unlock_submit(batch);
- return ret;
+ simple_mtx_lock(&batch->submit_lock);
+ bool ret = !batch->flushed;
+ if (!ret)
+ fd_batch_unlock_submit(batch);
+ return ret;
}
/* Since we reorder batches and can pause/resume queries (notably for disabling
* queries dueing some meta operations), we update the current query state for
* the batch before each draw.
*/
-static inline void fd_batch_update_queries(struct fd_batch *batch)
- assert_dt
+static inline void
+fd_batch_update_queries(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- if (ctx->query_update_batch)
- ctx->query_update_batch(batch, false);
+ if (ctx->query_update_batch)
+ ctx->query_update_batch(batch, false);
}
-static inline void fd_batch_finish_queries(struct fd_batch *batch)
- assert_dt
+static inline void
+fd_batch_finish_queries(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- if (ctx->query_update_batch)
- ctx->query_update_batch(batch, true);
+ if (ctx->query_update_batch)
+ ctx->query_update_batch(batch, true);
}
static inline void
fd_reset_wfi(struct fd_batch *batch)
{
- batch->needs_wfi = true;
+ batch->needs_wfi = true;
}
void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
*/
static inline void
fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum vgt_event_type evt)
+ enum vgt_event_type evt)
{
- OUT_PKT3(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, evt);
- fd_reset_wfi(batch);
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, evt);
+ fd_reset_wfi(batch);
}
/* Get per-tile epilogue */
static inline struct fd_ringbuffer *
fd_batch_get_epilogue(struct fd_batch *batch)
{
- if (batch->epilogue == NULL)
- batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
+ if (batch->epilogue == NULL)
+ batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
- return batch->epilogue;
+ return batch->epilogue;
}
-struct fd_ringbuffer * fd_batch_get_prologue(struct fd_batch *batch);
+struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);
#endif /* FREEDRENO_BATCH_H_ */
*/
#include "util/hash_table.h"
-#include "util/set.h"
#include "util/list.h"
+#include "util/set.h"
#include "util/u_string.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
*/
struct fd_batch_key {
- uint32_t width;
- uint32_t height;
- uint16_t layers;
- uint16_t samples;
- uint16_t num_surfs;
- uint16_t ctx_seqno;
- struct {
- struct pipe_resource *texture;
- union pipe_surface_desc u;
- uint8_t pos, samples;
- uint16_t format;
- } surf[0];
+ uint32_t width;
+ uint32_t height;
+ uint16_t layers;
+ uint16_t samples;
+ uint16_t num_surfs;
+ uint16_t ctx_seqno;
+ struct {
+ struct pipe_resource *texture;
+ union pipe_surface_desc u;
+ uint8_t pos, samples;
+ uint16_t format;
+ } surf[0];
};
static struct fd_batch_key *
key_alloc(unsigned num_surfs)
{
- struct fd_batch_key *key =
- CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_key, sizeof(key->surf[0]) * num_surfs);
- return key;
+ struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(
+ fd_batch_key, sizeof(key->surf[0]) * num_surfs);
+ return key;
}
uint32_t
fd_batch_key_hash(const void *_key)
{
- const struct fd_batch_key *key = _key;
- uint32_t hash = 0;
- hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
- hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs , hash);
- return hash;
+ const struct fd_batch_key *key = _key;
+ uint32_t hash = 0;
+ hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
+ hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);
+ return hash;
}
bool
fd_batch_key_equals(const void *_a, const void *_b)
{
- const struct fd_batch_key *a = _a;
- const struct fd_batch_key *b = _b;
- return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
- (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
+ const struct fd_batch_key *a = _a;
+ const struct fd_batch_key *b = _b;
+ return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
+ (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
}
struct fd_batch_key *
fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
{
- unsigned sz = sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
- struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
- memcpy(new_key, key, sz);
- return new_key;
+ unsigned sz =
+ sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
+ struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
+ memcpy(new_key, key, sz);
+ return new_key;
}
void
fd_bc_init(struct fd_batch_cache *cache)
{
- cache->ht = _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
+ cache->ht =
+ _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
}
void
fd_bc_fini(struct fd_batch_cache *cache)
{
- _mesa_hash_table_destroy(cache->ht, NULL);
+ _mesa_hash_table_destroy(cache->ht, NULL);
}
static void
-bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx, bool deferred)
- assert_dt
+bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx,
+ bool deferred) assert_dt
{
- /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
- * can cause batches to be unref'd and freed under our feet, so grab
- * a reference to all the batches we need up-front.
- */
- struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
- struct fd_batch *batch;
- unsigned n = 0;
-
- fd_screen_lock(ctx->screen);
-
- foreach_batch(batch, cache, cache->batch_mask) {
- if (batch->ctx == ctx) {
- fd_batch_reference_locked(&batches[n++], batch);
- }
- }
-
- if (deferred) {
- struct fd_batch *current_batch = fd_context_batch(ctx);
-
- for (unsigned i = 0; i < n; i++) {
- if (batches[i] && (batches[i]->ctx == ctx) &&
- (batches[i] != current_batch)) {
- fd_batch_add_dep(current_batch, batches[i]);
- }
- }
-
- fd_batch_reference_locked(¤t_batch, NULL);
-
- fd_screen_unlock(ctx->screen);
- } else {
- fd_screen_unlock(ctx->screen);
-
- for (unsigned i = 0; i < n; i++) {
- fd_batch_flush(batches[i]);
- }
- }
-
- for (unsigned i = 0; i < n; i++) {
- fd_batch_reference(&batches[i], NULL);
- }
+ /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
+ * can cause batches to be unref'd and freed under our feet, so grab
+ * a reference to all the batches we need up-front.
+ */
+ struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
+ struct fd_batch *batch;
+ unsigned n = 0;
+
+ fd_screen_lock(ctx->screen);
+
+ foreach_batch(batch, cache, cache->batch_mask)
+ {
+ if (batch->ctx == ctx) {
+ fd_batch_reference_locked(&batches[n++], batch);
+ }
+ }
+
+ if (deferred) {
+ struct fd_batch *current_batch = fd_context_batch(ctx);
+
+ for (unsigned i = 0; i < n; i++) {
+ if (batches[i] && (batches[i]->ctx == ctx) &&
+ (batches[i] != current_batch)) {
+ fd_batch_add_dep(current_batch, batches[i]);
+ }
+ }
+
+ fd_batch_reference_locked(¤t_batch, NULL);
+
+ fd_screen_unlock(ctx->screen);
+ } else {
+ fd_screen_unlock(ctx->screen);
+
+ for (unsigned i = 0; i < n; i++) {
+ fd_batch_flush(batches[i]);
+ }
+ }
+
+ for (unsigned i = 0; i < n; i++) {
+ fd_batch_reference(&batches[i], NULL);
+ }
}
void
fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
{
- bc_flush(cache, ctx, false);
+ bc_flush(cache, ctx, false);
}
/* deferred flush doesn't actually flush, but it marks every other
void
fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx)
{
- bc_flush(cache, ctx, true);
+ bc_flush(cache, ctx, true);
}
static bool
batch_in_cache(struct fd_batch_cache *cache, struct fd_batch *batch)
{
- struct fd_batch *b;
+ struct fd_batch *b;
- foreach_batch (b, cache, cache->batch_mask)
- if (b == batch)
- return true;
+ foreach_batch(b, cache, cache->batch_mask) if (b == batch) return true;
- return false;
+ return false;
}
void
fd_bc_dump(struct fd_screen *screen, const char *fmt, ...)
{
- struct fd_batch_cache *cache = &screen->batch_cache;
+ struct fd_batch_cache *cache = &screen->batch_cache;
- if (!BATCH_DEBUG)
- return;
+ if (!BATCH_DEBUG)
+ return;
- fd_screen_lock(screen);
+ fd_screen_lock(screen);
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
- set_foreach (screen->live_batches, entry) {
- struct fd_batch *batch = (struct fd_batch *)entry->key;
- printf(" %p<%u>%s%s\n", batch, batch->seqno,
- batch->needs_flush ? ", NEEDS FLUSH" : "",
- batch_in_cache(cache, batch) ? "" : ", ORPHAN");
- }
+ set_foreach (screen->live_batches, entry) {
+ struct fd_batch *batch = (struct fd_batch *)entry->key;
+ printf(" %p<%u>%s%s\n", batch, batch->seqno,
+ batch->needs_flush ? ", NEEDS FLUSH" : "",
+ batch_in_cache(cache, batch) ? "" : ", ORPHAN");
+ }
- printf("----\n");
+ printf("----\n");
- fd_screen_unlock(screen);
+ fd_screen_unlock(screen);
}
void
fd_bc_invalidate_context(struct fd_context *ctx)
{
- struct fd_batch_cache *cache = &ctx->screen->batch_cache;
- struct fd_batch *batch;
+ struct fd_batch_cache *cache = &ctx->screen->batch_cache;
+ struct fd_batch *batch;
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- foreach_batch(batch, cache, cache->batch_mask) {
- if (batch->ctx == ctx)
- fd_bc_invalidate_batch(batch, true);
- }
+ foreach_batch(batch, cache, cache->batch_mask)
+ {
+ if (batch->ctx == ctx)
+ fd_bc_invalidate_batch(batch, true);
+ }
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
}
/**
void
fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
{
- if (!batch)
- return;
+ if (!batch)
+ return;
- struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
- struct fd_batch_key *key = (struct fd_batch_key *)batch->key;
+ struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+ struct fd_batch_key *key = (struct fd_batch_key *)batch->key;
- fd_screen_assert_locked(batch->ctx->screen);
+ fd_screen_assert_locked(batch->ctx->screen);
- if (remove) {
- cache->batches[batch->idx] = NULL;
- cache->batch_mask &= ~(1 << batch->idx);
- }
+ if (remove) {
+ cache->batches[batch->idx] = NULL;
+ cache->batch_mask &= ~(1 << batch->idx);
+ }
- if (!key)
- return;
+ if (!key)
+ return;
- DBG("%p: key=%p", batch, batch->key);
- for (unsigned idx = 0; idx < key->num_surfs; idx++) {
- struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
- rsc->track->bc_batch_mask &= ~(1 << batch->idx);
- }
+ DBG("%p: key=%p", batch, batch->key);
+ for (unsigned idx = 0; idx < key->num_surfs; idx++) {
+ struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
+ rsc->track->bc_batch_mask &= ~(1 << batch->idx);
+ }
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
- _mesa_hash_table_remove(cache->ht, entry);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
+ _mesa_hash_table_remove(cache->ht, entry);
- batch->key = NULL;
- free(key);
+ batch->key = NULL;
+ free(key);
}
void
fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
{
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- struct fd_batch *batch;
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ struct fd_batch *batch;
- fd_screen_lock(screen);
+ fd_screen_lock(screen);
- if (destroy) {
- foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
- struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
- _mesa_set_remove(batch->resources, entry);
- }
- rsc->track->batch_mask = 0;
+ if (destroy) {
+ foreach_batch(batch, &screen->batch_cache, rsc->track->batch_mask)
+ {
+ struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
+ _mesa_set_remove(batch->resources, entry);
+ }
+ rsc->track->batch_mask = 0;
- fd_batch_reference_locked(&rsc->track->write_batch, NULL);
- }
+ fd_batch_reference_locked(&rsc->track->write_batch, NULL);
+ }
- foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
- fd_bc_invalidate_batch(batch, false);
+ foreach_batch(batch, &screen->batch_cache, rsc->track->bc_batch_mask)
+ fd_bc_invalidate_batch(batch, false);
- rsc->track->bc_batch_mask = 0;
+ rsc->track->bc_batch_mask = 0;
- fd_screen_unlock(screen);
+ fd_screen_unlock(screen);
}
static struct fd_batch *
-alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
- assert_dt
+alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
+ bool nondraw) assert_dt
{
- struct fd_batch *batch;
- uint32_t idx;
+ struct fd_batch *batch;
+ uint32_t idx;
- fd_screen_assert_locked(ctx->screen);
+ fd_screen_assert_locked(ctx->screen);
- while ((idx = ffs(~cache->batch_mask)) == 0) {
+ while ((idx = ffs(~cache->batch_mask)) == 0) {
#if 0
for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
batch = cache->batches[i];
debug_printf("\n");
}
#endif
- /* TODO: is LRU the better policy? Or perhaps the batch that
- * depends on the fewest other batches?
- */
- struct fd_batch *flush_batch = NULL;
- for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
- if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
- fd_batch_reference_locked(&flush_batch, cache->batches[i]);
- }
-
- /* we can drop lock temporarily here, since we hold a ref,
- * flush_batch won't disappear under us.
- */
- fd_screen_unlock(ctx->screen);
- DBG("%p: too many batches! flush forced!", flush_batch);
- fd_batch_flush(flush_batch);
- fd_screen_lock(ctx->screen);
-
- /* While the resources get cleaned up automatically, the flush_batch
- * doesn't get removed from the dependencies of other batches, so
- * it won't be unref'd and will remain in the table.
- *
- * TODO maybe keep a bitmask of batches that depend on me, to make
- * this easier:
- */
- for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
- struct fd_batch *other = cache->batches[i];
- if (!other)
- continue;
- if (other->dependents_mask & (1 << flush_batch->idx)) {
- other->dependents_mask &= ~(1 << flush_batch->idx);
- struct fd_batch *ref = flush_batch;
- fd_batch_reference_locked(&ref, NULL);
- }
- }
-
- fd_batch_reference_locked(&flush_batch, NULL);
- }
-
- idx--; /* bit zero returns 1 for ffs() */
-
- batch = fd_batch_create(ctx, nondraw);
- if (!batch)
- return NULL;
-
- batch->seqno = cache->cnt++;
- batch->idx = idx;
- cache->batch_mask |= (1 << idx);
-
- debug_assert(cache->batches[idx] == NULL);
- cache->batches[idx] = batch;
-
- return batch;
+ /* TODO: is LRU the better policy? Or perhaps the batch that
+ * depends on the fewest other batches?
+ */
+ struct fd_batch *flush_batch = NULL;
+ for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
+ if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
+ fd_batch_reference_locked(&flush_batch, cache->batches[i]);
+ }
+
+ /* we can drop lock temporarily here, since we hold a ref,
+ * flush_batch won't disappear under us.
+ */
+ fd_screen_unlock(ctx->screen);
+ DBG("%p: too many batches! flush forced!", flush_batch);
+ fd_batch_flush(flush_batch);
+ fd_screen_lock(ctx->screen);
+
+ /* While the resources get cleaned up automatically, the flush_batch
+ * doesn't get removed from the dependencies of other batches, so
+ * it won't be unref'd and will remain in the table.
+ *
+ * TODO maybe keep a bitmask of batches that depend on me, to make
+ * this easier:
+ */
+ for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
+ struct fd_batch *other = cache->batches[i];
+ if (!other)
+ continue;
+ if (other->dependents_mask & (1 << flush_batch->idx)) {
+ other->dependents_mask &= ~(1 << flush_batch->idx);
+ struct fd_batch *ref = flush_batch;
+ fd_batch_reference_locked(&ref, NULL);
+ }
+ }
+
+ fd_batch_reference_locked(&flush_batch, NULL);
+ }
+
+ idx--; /* bit zero returns 1 for ffs() */
+
+ batch = fd_batch_create(ctx, nondraw);
+ if (!batch)
+ return NULL;
+
+ batch->seqno = cache->cnt++;
+ batch->idx = idx;
+ cache->batch_mask |= (1 << idx);
+
+ debug_assert(cache->batches[idx] == NULL);
+ cache->batches[idx] = batch;
+
+ return batch;
}
struct fd_batch *
-fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
+fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx,
+ bool nondraw)
{
- struct fd_batch *batch;
+ struct fd_batch *batch;
- /* For normal draw batches, pctx->set_framebuffer_state() handles
- * this, but for nondraw batches, this is a nice central location
- * to handle them all.
- */
- if (nondraw)
- fd_context_switch_from(ctx);
+ /* For normal draw batches, pctx->set_framebuffer_state() handles
+ * this, but for nondraw batches, this is a nice central location
+ * to handle them all.
+ */
+ if (nondraw)
+ fd_context_switch_from(ctx);
- fd_screen_lock(ctx->screen);
- batch = alloc_batch_locked(cache, ctx, nondraw);
- fd_screen_unlock(ctx->screen);
+ fd_screen_lock(ctx->screen);
+ batch = alloc_batch_locked(cache, ctx, nondraw);
+ fd_screen_unlock(ctx->screen);
- if (batch && nondraw)
- fd_context_switch_to(ctx, batch);
+ if (batch && nondraw)
+ fd_context_switch_to(ctx, batch);
- return batch;
+ return batch;
}
static struct fd_batch *
batch_from_key(struct fd_batch_cache *cache, struct fd_batch_key *key,
- struct fd_context *ctx)
- assert_dt
+ struct fd_context *ctx) assert_dt
{
- struct fd_batch *batch = NULL;
- uint32_t hash = fd_batch_key_hash(key);
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
-
- if (entry) {
- free(key);
- fd_batch_reference(&batch, (struct fd_batch *)entry->data);
- return batch;
- }
-
- batch = alloc_batch_locked(cache, ctx, false);
+ struct fd_batch *batch = NULL;
+ uint32_t hash = fd_batch_key_hash(key);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
+
+ if (entry) {
+ free(key);
+ fd_batch_reference(&batch, (struct fd_batch *)entry->data);
+ return batch;
+ }
+
+ batch = alloc_batch_locked(cache, ctx, false);
#ifdef DEBUG
- DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash,
- key->width, key->height, key->layers, key->samples);
- for (unsigned idx = 0; idx < key->num_surfs; idx++) {
- DBG("%p: surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch, key->surf[idx].pos,
- key->surf[idx].texture, util_format_name(key->surf[idx].format),
- key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
- key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
- key->surf[idx].u.tex.level);
- }
+ DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
+ key->height, key->layers, key->samples);
+ for (unsigned idx = 0; idx < key->num_surfs; idx++) {
+ DBG("%p: surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,
+ key->surf[idx].pos, key->surf[idx].texture,
+ util_format_name(key->surf[idx].format),
+ key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
+ key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
+ key->surf[idx].u.tex.level);
+ }
#endif
- if (!batch)
- return NULL;
-
- /* reset max_scissor, which will be adjusted on draws
- * according to the actual scissor.
- */
- batch->max_scissor.minx = ~0;
- batch->max_scissor.miny = ~0;
- batch->max_scissor.maxx = 0;
- batch->max_scissor.maxy = 0;
-
- _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
- batch->key = key;
- batch->hash = hash;
-
- for (unsigned idx = 0; idx < key->num_surfs; idx++) {
- struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
- rsc->track->bc_batch_mask = (1 << batch->idx);
- }
-
- return batch;
+ if (!batch)
+ return NULL;
+
+ /* reset max_scissor, which will be adjusted on draws
+ * according to the actual scissor.
+ */
+ batch->max_scissor.minx = ~0;
+ batch->max_scissor.miny = ~0;
+ batch->max_scissor.maxx = 0;
+ batch->max_scissor.maxy = 0;
+
+ _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
+ batch->key = key;
+ batch->hash = hash;
+
+ for (unsigned idx = 0; idx < key->num_surfs; idx++) {
+ struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
+ rsc->track->bc_batch_mask = (1 << batch->idx);
+ }
+
+ return batch;
}
static void
-key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos, struct pipe_surface *psurf)
+key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,
+ struct pipe_surface *psurf)
{
- key->surf[idx].texture = psurf->texture;
- key->surf[idx].u = psurf->u;
- key->surf[idx].pos = pos;
- key->surf[idx].samples = MAX2(1, psurf->nr_samples);
- key->surf[idx].format = psurf->format;
+ key->surf[idx].texture = psurf->texture;
+ key->surf[idx].u = psurf->u;
+ key->surf[idx].pos = pos;
+ key->surf[idx].samples = MAX2(1, psurf->nr_samples);
+ key->surf[idx].format = psurf->format;
}
struct fd_batch *
fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
- const struct pipe_framebuffer_state *pfb)
+ const struct pipe_framebuffer_state *pfb)
{
- unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
- struct fd_batch_key *key = key_alloc(n);
+ unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
+ struct fd_batch_key *key = key_alloc(n);
- key->width = pfb->width;
- key->height = pfb->height;
- key->layers = pfb->layers;
- key->samples = util_framebuffer_get_num_samples(pfb);
- key->ctx_seqno = ctx->seqno;
+ key->width = pfb->width;
+ key->height = pfb->height;
+ key->layers = pfb->layers;
+ key->samples = util_framebuffer_get_num_samples(pfb);
+ key->ctx_seqno = ctx->seqno;
- if (pfb->zsbuf)
- key_surf(key, idx++, 0, pfb->zsbuf);
+ if (pfb->zsbuf)
+ key_surf(key, idx++, 0, pfb->zsbuf);
- for (unsigned i = 0; i < pfb->nr_cbufs; i++)
- if (pfb->cbufs[i])
- key_surf(key, idx++, i + 1, pfb->cbufs[i]);
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++)
+ if (pfb->cbufs[i])
+ key_surf(key, idx++, i + 1, pfb->cbufs[i]);
- key->num_surfs = idx;
+ key->num_surfs = idx;
- fd_screen_lock(ctx->screen);
- struct fd_batch *batch = batch_from_key(cache, key, ctx);
- fd_screen_unlock(ctx->screen);
+ fd_screen_lock(ctx->screen);
+ struct fd_batch *batch = batch_from_key(cache, key, ctx);
+ fd_screen_unlock(ctx->screen);
- return batch;
+ return batch;
}
struct hash_table;
struct fd_batch_cache {
- struct hash_table *ht;
- unsigned cnt;
+ struct hash_table *ht;
+ unsigned cnt;
- /* set of active batches.. there is an upper limit on the number of
- * in-flight batches, for two reasons:
- * 1) to avoid big spikes in number of batches in edge cases, such as
- * game startup (ie, lots of texture uploads, but no usages yet of
- * the textures), etc.
- * 2) so we can use a simple bitmask in fd_resource to track which
- * batches have reference to the resource
- */
- struct fd_batch *batches[32];
- uint32_t batch_mask;
+ /* set of active batches.. there is an upper limit on the number of
+ * in-flight batches, for two reasons:
+ * 1) to avoid big spikes in number of batches in edge cases, such as
+ * game startup (ie, lots of texture uploads, but no usages yet of
+ * the textures), etc.
+ * 2) so we can use a simple bitmask in fd_resource to track which
+ * batches have reference to the resource
+ */
+ struct fd_batch *batches[32];
+ uint32_t batch_mask;
};
/* note: if batches get unref'd in the body of the loop, they are removed
* the loop into _m, we need the &= at the end of the loop to make sure
* we don't have stale bits in _m
*/
-#define foreach_batch(batch, cache, mask) \
- for (uint32_t _m = (mask); _m && ((batch) = (cache)->batches[u_bit_scan(&_m)]); _m &= (mask))
+#define foreach_batch(batch, cache, mask) \
+ for (uint32_t _m = (mask); \
+ _m && ((batch) = (cache)->batches[u_bit_scan(&_m)]); _m &= (mask))
void fd_bc_init(struct fd_batch_cache *cache);
void fd_bc_fini(struct fd_batch_cache *cache);
-void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) assert_dt;
-void fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx) assert_dt;
-void fd_bc_dump(struct fd_screen *screen, const char *fmt, ...) _util_printf_format(2, 3);
+void fd_bc_flush(struct fd_batch_cache *cache,
+ struct fd_context *ctx) assert_dt;
+void fd_bc_flush_deferred(struct fd_batch_cache *cache,
+ struct fd_context *ctx) assert_dt;
+void fd_bc_dump(struct fd_screen *screen, const char *fmt, ...)
+ _util_printf_format(2, 3);
void fd_bc_invalidate_context(struct fd_context *ctx);
void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy);
void fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy);
-struct fd_batch * fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw) assert_dt;
+struct fd_batch *fd_bc_alloc_batch(struct fd_batch_cache *cache,
+ struct fd_context *ctx,
+ bool nondraw) assert_dt;
-struct fd_batch * fd_batch_from_fb(struct fd_batch_cache *cache,
- struct fd_context *ctx, const struct pipe_framebuffer_state *pfb) assert_dt;
+struct fd_batch *
+fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
+ const struct pipe_framebuffer_state *pfb) assert_dt;
#endif /* FREEDRENO_BATCH_CACHE_H_ */
#include "freedreno_blitter.h"
#include "freedreno_context.h"
-#include "freedreno_resource.h"
#include "freedreno_fence.h"
+#include "freedreno_resource.h"
/* generic blit using u_blitter.. slightly modified version of util_blitter_blit
* which also handles PIPE_BUFFER:
static void
default_dst_texture(struct pipe_surface *dst_templ, struct pipe_resource *dst,
- unsigned dstlevel, unsigned dstz)
+ unsigned dstlevel, unsigned dstz)
{
- memset(dst_templ, 0, sizeof(*dst_templ));
- dst_templ->u.tex.level = dstlevel;
- dst_templ->u.tex.first_layer = dstz;
- dst_templ->u.tex.last_layer = dstz;
+ memset(dst_templ, 0, sizeof(*dst_templ));
+ dst_templ->u.tex.level = dstlevel;
+ dst_templ->u.tex.first_layer = dstz;
+ dst_templ->u.tex.last_layer = dstz;
}
static void
default_src_texture(struct pipe_sampler_view *src_templ,
- struct pipe_resource *src, unsigned srclevel)
+ struct pipe_resource *src, unsigned srclevel)
{
- bool cube_as_2darray = src->screen->get_param(src->screen,
- PIPE_CAP_SAMPLER_VIEW_TARGET);
-
- memset(src_templ, 0, sizeof(*src_templ));
-
- if (cube_as_2darray && (src->target == PIPE_TEXTURE_CUBE ||
- src->target == PIPE_TEXTURE_CUBE_ARRAY))
- src_templ->target = PIPE_TEXTURE_2D_ARRAY;
- else
- src_templ->target = src->target;
-
- if (src->target == PIPE_BUFFER) {
- src_templ->target = PIPE_TEXTURE_1D;
- }
- src_templ->u.tex.first_level = srclevel;
- src_templ->u.tex.last_level = srclevel;
- src_templ->u.tex.first_layer = 0;
- src_templ->u.tex.last_layer =
- src->target == PIPE_TEXTURE_3D ? u_minify(src->depth0, srclevel) - 1
- : (unsigned)(src->array_size - 1);
- src_templ->swizzle_r = PIPE_SWIZZLE_X;
- src_templ->swizzle_g = PIPE_SWIZZLE_Y;
- src_templ->swizzle_b = PIPE_SWIZZLE_Z;
- src_templ->swizzle_a = PIPE_SWIZZLE_W;
+ bool cube_as_2darray =
+ src->screen->get_param(src->screen, PIPE_CAP_SAMPLER_VIEW_TARGET);
+
+ memset(src_templ, 0, sizeof(*src_templ));
+
+ if (cube_as_2darray && (src->target == PIPE_TEXTURE_CUBE ||
+ src->target == PIPE_TEXTURE_CUBE_ARRAY))
+ src_templ->target = PIPE_TEXTURE_2D_ARRAY;
+ else
+ src_templ->target = src->target;
+
+ if (src->target == PIPE_BUFFER) {
+ src_templ->target = PIPE_TEXTURE_1D;
+ }
+ src_templ->u.tex.first_level = srclevel;
+ src_templ->u.tex.last_level = srclevel;
+ src_templ->u.tex.first_layer = 0;
+ src_templ->u.tex.last_layer = src->target == PIPE_TEXTURE_3D
+ ? u_minify(src->depth0, srclevel) - 1
+ : (unsigned)(src->array_size - 1);
+ src_templ->swizzle_r = PIPE_SWIZZLE_X;
+ src_templ->swizzle_g = PIPE_SWIZZLE_Y;
+ src_templ->swizzle_b = PIPE_SWIZZLE_Z;
+ src_templ->swizzle_a = PIPE_SWIZZLE_W;
}
static void
-fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
- assert_dt
+fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond,
+ bool discard) assert_dt
{
- fd_fence_ref(&ctx->last_fence, NULL);
-
- util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
- util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
- util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vs);
- util_blitter_save_tessctrl_shader(ctx->blitter, ctx->prog.hs);
- util_blitter_save_tesseval_shader(ctx->blitter, ctx->prog.ds);
- util_blitter_save_geometry_shader(ctx->blitter, ctx->prog.gs);
- util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
- ctx->streamout.targets);
- util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
- util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
- util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
- util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fs);
- util_blitter_save_blend(ctx->blitter, ctx->blend);
- util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
- util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
- util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
- util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
- util_blitter_save_fragment_sampler_states(ctx->blitter,
- ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
- (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
- util_blitter_save_fragment_sampler_views(ctx->blitter,
- ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
- ctx->tex[PIPE_SHADER_FRAGMENT].textures);
- if (!render_cond)
- util_blitter_save_render_condition(ctx->blitter,
- ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
-
- if (ctx->batch)
- fd_batch_update_queries(ctx->batch);
-
- ctx->in_discard_blit = discard;
+ fd_fence_ref(&ctx->last_fence, NULL);
+
+ util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
+ util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
+ util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vs);
+ util_blitter_save_tessctrl_shader(ctx->blitter, ctx->prog.hs);
+ util_blitter_save_tesseval_shader(ctx->blitter, ctx->prog.ds);
+ util_blitter_save_geometry_shader(ctx->blitter, ctx->prog.gs);
+ util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
+ ctx->streamout.targets);
+ util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
+ util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
+ util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
+ util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fs);
+ util_blitter_save_blend(ctx->blitter, ctx->blend);
+ util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
+ util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
+ util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
+ util_blitter_save_fragment_sampler_states(
+ ctx->blitter, ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
+ (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
+ util_blitter_save_fragment_sampler_views(
+ ctx->blitter, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
+ ctx->tex[PIPE_SHADER_FRAGMENT].textures);
+ if (!render_cond)
+ util_blitter_save_render_condition(ctx->blitter, ctx->cond_query,
+ ctx->cond_cond, ctx->cond_mode);
+
+ if (ctx->batch)
+ fd_batch_update_queries(ctx->batch);
+
+ ctx->in_discard_blit = discard;
}
static void
-fd_blitter_pipe_end(struct fd_context *ctx)
- assert_dt
+fd_blitter_pipe_end(struct fd_context *ctx) assert_dt
{
- ctx->in_discard_blit = false;
+ ctx->in_discard_blit = false;
}
bool
fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
{
- struct pipe_resource *dst = info->dst.resource;
- struct pipe_resource *src = info->src.resource;
- struct pipe_context *pipe = &ctx->base;
- struct pipe_surface *dst_view, dst_templ;
- struct pipe_sampler_view src_templ, *src_view;
- bool discard = false;
-
- if (!info->scissor_enable && !info->alpha_blend) {
- discard = util_texrange_covers_whole_level(info->dst.resource,
- info->dst.level, info->dst.box.x, info->dst.box.y,
- info->dst.box.z, info->dst.box.width,
- info->dst.box.height, info->dst.box.depth);
- }
-
- fd_blitter_pipe_begin(ctx, info->render_condition_enable, discard);
-
- /* Initialize the surface. */
- default_dst_texture(&dst_templ, dst, info->dst.level,
- info->dst.box.z);
- dst_templ.format = info->dst.format;
- dst_view = pipe->create_surface(pipe, dst, &dst_templ);
-
- /* Initialize the sampler view. */
- default_src_texture(&src_templ, src, info->src.level);
- src_templ.format = info->src.format;
- src_view = pipe->create_sampler_view(pipe, src, &src_templ);
-
- /* Copy. */
- util_blitter_blit_generic(ctx->blitter, dst_view, &info->dst.box,
- src_view, &info->src.box, src->width0, src->height0,
- info->mask, info->filter,
- info->scissor_enable ? &info->scissor : NULL,
- info->alpha_blend);
-
- pipe_surface_reference(&dst_view, NULL);
- pipe_sampler_view_reference(&src_view, NULL);
-
- fd_blitter_pipe_end(ctx);
-
- /* The fallback blitter must never fail: */
- return true;
+ struct pipe_resource *dst = info->dst.resource;
+ struct pipe_resource *src = info->src.resource;
+ struct pipe_context *pipe = &ctx->base;
+ struct pipe_surface *dst_view, dst_templ;
+ struct pipe_sampler_view src_templ, *src_view;
+ bool discard = false;
+
+ if (!info->scissor_enable && !info->alpha_blend) {
+ discard = util_texrange_covers_whole_level(
+ info->dst.resource, info->dst.level, info->dst.box.x, info->dst.box.y,
+ info->dst.box.z, info->dst.box.width, info->dst.box.height,
+ info->dst.box.depth);
+ }
+
+ fd_blitter_pipe_begin(ctx, info->render_condition_enable, discard);
+
+ /* Initialize the surface. */
+ default_dst_texture(&dst_templ, dst, info->dst.level, info->dst.box.z);
+ dst_templ.format = info->dst.format;
+ dst_view = pipe->create_surface(pipe, dst, &dst_templ);
+
+ /* Initialize the sampler view. */
+ default_src_texture(&src_templ, src, info->src.level);
+ src_templ.format = info->src.format;
+ src_view = pipe->create_sampler_view(pipe, src, &src_templ);
+
+ /* Copy. */
+ util_blitter_blit_generic(
+ ctx->blitter, dst_view, &info->dst.box, src_view, &info->src.box,
+ src->width0, src->height0, info->mask, info->filter,
+ info->scissor_enable ? &info->scissor : NULL, info->alpha_blend);
+
+ pipe_surface_reference(&dst_view, NULL);
+ pipe_sampler_view_reference(&src_view, NULL);
+
+ fd_blitter_pipe_end(ctx);
+
+ /* The fallback blitter must never fail: */
+ return true;
}
/* Generic clear implementation (partially) using u_blitter: */
void
fd_blitter_clear(struct pipe_context *pctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
+ const union pipe_color_union *color, double depth,
+ unsigned stencil)
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
- struct blitter_context *blitter = ctx->blitter;
-
- /* Note: don't use discard=true, if there was something to
- * discard, that would have been already handled in fd_clear().
- */
- fd_blitter_pipe_begin(ctx, false, false);
-
- util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
- ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
-
- util_blitter_common_clear_setup(blitter, pfb->width, pfb->height,
- buffers, NULL, NULL);
-
- struct pipe_stencil_ref sr = {
- .ref_value = { stencil & 0xff }
- };
- pctx->set_stencil_ref(pctx, sr);
-
- struct pipe_constant_buffer cb = {
- .buffer_size = 16,
- .user_buffer = &color->ui,
- };
- pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, &cb);
-
- unsigned rs_idx = pfb->samples > 1 ? 1 : 0;
- if (!ctx->clear_rs_state[rs_idx]) {
- const struct pipe_rasterizer_state tmpl = {
- .cull_face = PIPE_FACE_NONE,
- .half_pixel_center = 1,
- .bottom_edge_rule = 1,
- .flatshade = 1,
- .depth_clip_near = 1,
- .depth_clip_far = 1,
- .multisample = pfb->samples > 1,
- };
- ctx->clear_rs_state[rs_idx] = pctx->create_rasterizer_state(pctx, &tmpl);
- }
- pctx->bind_rasterizer_state(pctx, ctx->clear_rs_state[rs_idx]);
-
- struct pipe_viewport_state vp = {
- .scale = { 0.5f * pfb->width, -0.5f * pfb->height, depth },
- .translate = { 0.5f * pfb->width, 0.5f * pfb->height, 0.0f },
- };
- pctx->set_viewport_states(pctx, 0, 1, &vp);
-
- pctx->bind_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
- pctx->set_vertex_buffers(pctx, blitter->vb_slot, 1, 0, false,
- &ctx->solid_vbuf_state.vertexbuf.vb[0]);
- pctx->set_stream_output_targets(pctx, 0, NULL, NULL);
-
- if (pfb->layers > 1)
- pctx->bind_vs_state(pctx, ctx->solid_layered_prog.vs);
- else
- pctx->bind_vs_state(pctx, ctx->solid_prog.vs);
-
- pctx->bind_fs_state(pctx, ctx->solid_prog.fs);
-
- /* Clear geom/tess shaders, lest the draw emit code think we are
- * trying to use use them:
- */
- pctx->bind_gs_state(pctx, NULL);
- pctx->bind_tcs_state(pctx, NULL);
- pctx->bind_tes_state(pctx, NULL);
-
- struct pipe_draw_info info = {
- .mode = PIPE_PRIM_MAX, /* maps to DI_PT_RECTLIST */
- .index_bounds_valid = true,
- .max_index = 1,
- .instance_count = MAX2(1, pfb->layers),
- };
- struct pipe_draw_start_count draw = {
- .count = 2,
- };
- pctx->draw_vbo(pctx, &info, NULL, &draw, 1);
-
- /* We expect that this should not have triggered a change in pfb: */
- assert(util_framebuffer_state_equal(pfb, &ctx->framebuffer));
-
- util_blitter_restore_constant_buffer_state(blitter);
- util_blitter_restore_vertex_states(blitter);
- util_blitter_restore_fragment_states(blitter);
- util_blitter_restore_textures(blitter);
- util_blitter_restore_fb_state(blitter);
- util_blitter_restore_render_cond(blitter);
- util_blitter_unset_running_flag(blitter);
-
- fd_blitter_pipe_end(ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ struct blitter_context *blitter = ctx->blitter;
+
+ /* Note: don't use discard=true, if there was something to
+ * discard, that would have been already handled in fd_clear().
+ */
+ fd_blitter_pipe_begin(ctx, false, false);
+
+ util_blitter_save_fragment_constant_buffer_slot(
+ ctx->blitter, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
+
+ util_blitter_common_clear_setup(blitter, pfb->width, pfb->height, buffers,
+ NULL, NULL);
+
+ struct pipe_stencil_ref sr = {.ref_value = {stencil & 0xff}};
+ pctx->set_stencil_ref(pctx, sr);
+
+ struct pipe_constant_buffer cb = {
+ .buffer_size = 16,
+ .user_buffer = &color->ui,
+ };
+ pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, &cb);
+
+ unsigned rs_idx = pfb->samples > 1 ? 1 : 0;
+ if (!ctx->clear_rs_state[rs_idx]) {
+ const struct pipe_rasterizer_state tmpl = {
+ .cull_face = PIPE_FACE_NONE,
+ .half_pixel_center = 1,
+ .bottom_edge_rule = 1,
+ .flatshade = 1,
+ .depth_clip_near = 1,
+ .depth_clip_far = 1,
+ .multisample = pfb->samples > 1,
+ };
+ ctx->clear_rs_state[rs_idx] = pctx->create_rasterizer_state(pctx, &tmpl);
+ }
+ pctx->bind_rasterizer_state(pctx, ctx->clear_rs_state[rs_idx]);
+
+ struct pipe_viewport_state vp = {
+ .scale = {0.5f * pfb->width, -0.5f * pfb->height, depth},
+ .translate = {0.5f * pfb->width, 0.5f * pfb->height, 0.0f},
+ };
+ pctx->set_viewport_states(pctx, 0, 1, &vp);
+
+ pctx->bind_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
+ pctx->set_vertex_buffers(pctx, blitter->vb_slot, 1, 0, false,
+ &ctx->solid_vbuf_state.vertexbuf.vb[0]);
+ pctx->set_stream_output_targets(pctx, 0, NULL, NULL);
+
+ if (pfb->layers > 1)
+ pctx->bind_vs_state(pctx, ctx->solid_layered_prog.vs);
+ else
+ pctx->bind_vs_state(pctx, ctx->solid_prog.vs);
+
+ pctx->bind_fs_state(pctx, ctx->solid_prog.fs);
+
+ /* Clear geom/tess shaders, lest the draw emit code think we are
+ * trying to use use them:
+ */
+ pctx->bind_gs_state(pctx, NULL);
+ pctx->bind_tcs_state(pctx, NULL);
+ pctx->bind_tes_state(pctx, NULL);
+
+ struct pipe_draw_info info = {
+ .mode = PIPE_PRIM_MAX, /* maps to DI_PT_RECTLIST */
+ .index_bounds_valid = true,
+ .max_index = 1,
+ .instance_count = MAX2(1, pfb->layers),
+ };
+ struct pipe_draw_start_count draw = {
+ .count = 2,
+ };
+ pctx->draw_vbo(pctx, &info, NULL, &draw, 1);
+
+ /* We expect that this should not have triggered a change in pfb: */
+ assert(util_framebuffer_state_equal(pfb, &ctx->framebuffer));
+
+ util_blitter_restore_constant_buffer_state(blitter);
+ util_blitter_restore_vertex_states(blitter);
+ util_blitter_restore_fragment_states(blitter);
+ util_blitter_restore_textures(blitter);
+ util_blitter_restore_fb_state(blitter);
+ util_blitter_restore_render_cond(blitter);
+ util_blitter_unset_running_flag(blitter);
+
+ fd_blitter_pipe_end(ctx);
}
/**
bool
fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_blit_info info = *blit_info;
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_blit_info info = *blit_info;
- if (info.render_condition_enable && !fd_render_condition_check(pctx))
- return true;
+ if (info.render_condition_enable && !fd_render_condition_check(pctx))
+ return true;
- if (ctx->blit && ctx->blit(ctx, &info))
- return true;
+ if (ctx->blit && ctx->blit(ctx, &info))
+ return true;
- if (info.mask & PIPE_MASK_S) {
- DBG("cannot blit stencil, skipping");
- info.mask &= ~PIPE_MASK_S;
- }
+ if (info.mask & PIPE_MASK_S) {
+ DBG("cannot blit stencil, skipping");
+ info.mask &= ~PIPE_MASK_S;
+ }
- if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
- DBG("blit unsupported %s -> %s",
- util_format_short_name(info.src.resource->format),
- util_format_short_name(info.dst.resource->format));
- return false;
- }
+ if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
+ DBG("blit unsupported %s -> %s",
+ util_format_short_name(info.src.resource->format),
+ util_format_short_name(info.dst.resource->format));
+ return false;
+ }
- return fd_blitter_blit(ctx, &info);
+ return fd_blitter_blit(ctx, &info);
}
/**
* _copy_region using pipe (3d engine)
*/
static bool
-fd_blitter_pipe_copy_region(struct fd_context *ctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
- assert_dt
+fd_blitter_pipe_copy_region(struct fd_context *ctx, struct pipe_resource *dst,
+ unsigned dst_level, unsigned dstx, unsigned dsty,
+ unsigned dstz, struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box) assert_dt
{
- /* not until we allow rendertargets to be buffers */
- if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
- return false;
+ /* not until we allow rendertargets to be buffers */
+ if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
+ return false;
- if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
- return false;
+ if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
+ return false;
- /* TODO we could discard if dst box covers dst level fully.. */
- fd_blitter_pipe_begin(ctx, false, false);
- util_blitter_copy_texture(ctx->blitter,
- dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
- fd_blitter_pipe_end(ctx);
+ /* TODO we could discard if dst box covers dst level fully.. */
+ fd_blitter_pipe_begin(ctx, false, false);
+ util_blitter_copy_texture(ctx->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+ fd_blitter_pipe_end(ctx);
- return true;
+ return true;
}
/**
* The resource must be of the same format.
*/
void
-fd_resource_copy_region(struct pipe_context *pctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
+fd_resource_copy_region(struct pipe_context *pctx, struct pipe_resource *dst,
+ unsigned dst_level, unsigned dstx, unsigned dsty,
+ unsigned dstz, struct pipe_resource *src,
+ unsigned src_level, const struct pipe_box *src_box)
{
- struct fd_context *ctx = fd_context(pctx);
-
- if (ctx->blit) {
- struct pipe_blit_info info;
-
- memset(&info, 0, sizeof info);
- info.dst.resource = dst;
- info.dst.level = dst_level;
- info.dst.box.x = dstx;
- info.dst.box.y = dsty;
- info.dst.box.z = dstz;
- info.dst.box.width = src_box->width;
- info.dst.box.height = src_box->height;
- assert(info.dst.box.width >= 0);
- assert(info.dst.box.height >= 0);
- info.dst.box.depth = 1;
- info.dst.format = dst->format;
- info.src.resource = src;
- info.src.level = src_level;
- info.src.box = *src_box;
- info.src.format = src->format;
- info.mask = util_format_get_mask(src->format);
- info.filter = PIPE_TEX_FILTER_NEAREST;
- info.scissor_enable = 0;
-
- if (ctx->blit(ctx, &info))
- return;
- }
-
- /* TODO if we have 2d core, or other DMA engine that could be used
- * for simple copies and reasonably easily synchronized with the 3d
- * core, this is where we'd plug it in..
- */
-
- /* try blit on 3d pipe: */
- if (fd_blitter_pipe_copy_region(ctx,
- dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box))
- return;
-
- /* else fallback to pure sw: */
- util_resource_copy_region(pctx,
- dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (ctx->blit) {
+ struct pipe_blit_info info;
+
+ memset(&info, 0, sizeof info);
+ info.dst.resource = dst;
+ info.dst.level = dst_level;
+ info.dst.box.x = dstx;
+ info.dst.box.y = dsty;
+ info.dst.box.z = dstz;
+ info.dst.box.width = src_box->width;
+ info.dst.box.height = src_box->height;
+ assert(info.dst.box.width >= 0);
+ assert(info.dst.box.height >= 0);
+ info.dst.box.depth = 1;
+ info.dst.format = dst->format;
+ info.src.resource = src;
+ info.src.level = src_level;
+ info.src.box = *src_box;
+ info.src.format = src->format;
+ info.mask = util_format_get_mask(src->format);
+ info.filter = PIPE_TEX_FILTER_NEAREST;
+ info.scissor_enable = 0;
+
+ if (ctx->blit(ctx, &info))
+ return;
+ }
+
+ /* TODO if we have 2d core, or other DMA engine that could be used
+ * for simple copies and reasonably easily synchronized with the 3d
+ * core, this is where we'd plug it in..
+ */
+
+ /* try blit on 3d pipe: */
+ if (fd_blitter_pipe_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box))
+ return;
+
+ /* else fallback to pure sw: */
+ util_resource_copy_region(pctx, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box);
}
#include "freedreno_context.h"
-bool fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt;
+bool fd_blitter_blit(struct fd_context *ctx,
+ const struct pipe_blit_info *info) assert_dt;
-void
-fd_blitter_clear(struct pipe_context *pctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil) assert_dt;
+void fd_blitter_clear(struct pipe_context *pctx, unsigned buffers,
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) assert_dt;
void fd_resource_copy_region(struct pipe_context *pctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box) assert_dt;
-
-bool fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) assert_dt;
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box) assert_dt;
+
+bool fd_blit(struct pipe_context *pctx,
+ const struct pipe_blit_info *blit_info) assert_dt;
#endif /* FREEDRENO_BLIT_H_ */
*/
#include "freedreno_context.h"
+#include "ir3/ir3_cache.h"
+#include "util/u_upload_mgr.h"
#include "freedreno_blitter.h"
#include "freedreno_draw.h"
#include "freedreno_fence.h"
-#include "freedreno_program.h"
-#include "freedreno_resource.h"
-#include "freedreno_texture.h"
-#include "freedreno_state.h"
#include "freedreno_gmem.h"
+#include "freedreno_program.h"
#include "freedreno_query.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
+#include "freedreno_state.h"
+#include "freedreno_texture.h"
#include "freedreno_util.h"
-#include "ir3/ir3_cache.h"
-#include "util/u_upload_mgr.h"
static void
fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep,
- unsigned flags)
- in_dt
+ unsigned flags) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_fence_handle *fence = NULL;
- struct fd_batch *batch = NULL;
-
- /* We want to lookup current batch if it exists, but not create a new
- * one if not (unless we need a fence)
- */
- fd_batch_reference(&batch, ctx->batch);
-
- DBG("%p: flush: flags=%x", batch, flags);
-
- if (fencep && !batch) {
- batch = fd_context_batch(ctx);
- } else if (!batch) {
- fd_bc_dump(ctx->screen, "%p: NULL batch, remaining:\n", ctx);
- return;
- }
-
- /* With TC_FLUSH_ASYNC, the fence will have been pre-created from
- * the front-end thread. But not yet associated with a batch,
- * because we cannot safely access ctx->batch outside of the driver
- * thread. So instead, replace the existing batch->fence with the
- * one created earlier
- */
- if ((flags & TC_FLUSH_ASYNC) && fencep) {
- /* We don't currently expect async+flush in the fence-fd
- * case.. for that to work properly we'd need TC to tell
- * us in the create_fence callback that it needs an fd.
- */
- assert(!(flags & PIPE_FLUSH_FENCE_FD));
-
- fd_fence_set_batch(*fencep, batch);
- fd_fence_ref(&batch->fence, *fencep);
-
- /* We (a) cannot substitute the provided fence with last_fence,
- * and (b) need fd_fence_populate() to be eventually called on
- * the fence that was pre-created in frontend-thread:
- */
- fd_fence_ref(&ctx->last_fence, NULL);
-
- /* async flush is not compatible with deferred flush, since
- * nothing triggers the batch flush which fence_flush() would
- * be waiting for
- */
- flags &= ~PIPE_FLUSH_DEFERRED;
- }
-
- /* In some sequence of events, we can end up with a last_fence that is
- * not an "fd" fence, which results in eglDupNativeFenceFDANDROID()
- * errors.
- */
- if ((flags & PIPE_FLUSH_FENCE_FD) && ctx->last_fence &&
- !fd_fence_is_fd(ctx->last_fence))
- fd_fence_ref(&ctx->last_fence, NULL);
-
- /* if no rendering since last flush, ie. app just decided it needed
- * a fence, re-use the last one:
- */
- if (ctx->last_fence) {
- fd_fence_ref(&fence, ctx->last_fence);
- fd_bc_dump(ctx->screen, "%p: reuse last_fence, remaining:\n", ctx);
- goto out;
- }
-
- /* Take a ref to the batch's fence (batch can be unref'd when flushed: */
- fd_fence_ref(&fence, batch->fence);
-
- if (flags & PIPE_FLUSH_FENCE_FD)
- batch->needs_out_fence_fd = true;
-
- fd_bc_dump(ctx->screen, "%p: flushing %p<%u>, flags=0x%x, pending:\n",
- ctx, batch, batch->seqno, flags);
-
- if (!ctx->screen->reorder) {
- fd_batch_flush(batch);
- } else if (flags & PIPE_FLUSH_DEFERRED) {
- fd_bc_flush_deferred(&ctx->screen->batch_cache, ctx);
- } else {
- fd_bc_flush(&ctx->screen->batch_cache, ctx);
- }
-
- fd_bc_dump(ctx->screen, "%p: remaining:\n", ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_fence_handle *fence = NULL;
+ struct fd_batch *batch = NULL;
+
+ /* We want to lookup current batch if it exists, but not create a new
+ * one if not (unless we need a fence)
+ */
+ fd_batch_reference(&batch, ctx->batch);
+
+ DBG("%p: flush: flags=%x", batch, flags);
+
+ if (fencep && !batch) {
+ batch = fd_context_batch(ctx);
+ } else if (!batch) {
+ fd_bc_dump(ctx->screen, "%p: NULL batch, remaining:\n", ctx);
+ return;
+ }
+
+ /* With TC_FLUSH_ASYNC, the fence will have been pre-created from
+ * the front-end thread. But not yet associated with a batch,
+ * because we cannot safely access ctx->batch outside of the driver
+ * thread. So instead, replace the existing batch->fence with the
+ * one created earlier
+ */
+ if ((flags & TC_FLUSH_ASYNC) && fencep) {
+ /* We don't currently expect async+flush in the fence-fd
+ * case.. for that to work properly we'd need TC to tell
+ * us in the create_fence callback that it needs an fd.
+ */
+ assert(!(flags & PIPE_FLUSH_FENCE_FD));
+
+ fd_fence_set_batch(*fencep, batch);
+ fd_fence_ref(&batch->fence, *fencep);
+
+ /* We (a) cannot substitute the provided fence with last_fence,
+ * and (b) need fd_fence_populate() to be eventually called on
+ * the fence that was pre-created in frontend-thread:
+ */
+ fd_fence_ref(&ctx->last_fence, NULL);
+
+ /* async flush is not compatible with deferred flush, since
+ * nothing triggers the batch flush which fence_flush() would
+ * be waiting for
+ */
+ flags &= ~PIPE_FLUSH_DEFERRED;
+ }
+
+ /* In some sequence of events, we can end up with a last_fence that is
+ * not an "fd" fence, which results in eglDupNativeFenceFDANDROID()
+ * errors.
+ */
+ if ((flags & PIPE_FLUSH_FENCE_FD) && ctx->last_fence &&
+ !fd_fence_is_fd(ctx->last_fence))
+ fd_fence_ref(&ctx->last_fence, NULL);
+
+ /* if no rendering since last flush, ie. app just decided it needed
+ * a fence, re-use the last one:
+ */
+ if (ctx->last_fence) {
+ fd_fence_ref(&fence, ctx->last_fence);
+ fd_bc_dump(ctx->screen, "%p: reuse last_fence, remaining:\n", ctx);
+ goto out;
+ }
+
+ /* Take a ref to the batch's fence (batch can be unref'd when flushed: */
+ fd_fence_ref(&fence, batch->fence);
+
+ if (flags & PIPE_FLUSH_FENCE_FD)
+ batch->needs_out_fence_fd = true;
+
+ fd_bc_dump(ctx->screen, "%p: flushing %p<%u>, flags=0x%x, pending:\n", ctx,
+ batch, batch->seqno, flags);
+
+ if (!ctx->screen->reorder) {
+ fd_batch_flush(batch);
+ } else if (flags & PIPE_FLUSH_DEFERRED) {
+ fd_bc_flush_deferred(&ctx->screen->batch_cache, ctx);
+ } else {
+ fd_bc_flush(&ctx->screen->batch_cache, ctx);
+ }
+
+ fd_bc_dump(ctx->screen, "%p: remaining:\n", ctx);
out:
- if (fencep)
- fd_fence_ref(fencep, fence);
+ if (fencep)
+ fd_fence_ref(fencep, fence);
- fd_fence_ref(&ctx->last_fence, fence);
+ fd_fence_ref(&ctx->last_fence, fence);
- fd_fence_ref(&fence, NULL);
+ fd_fence_ref(&fence, NULL);
- fd_batch_reference(&batch, NULL);
+ fd_batch_reference(&batch, NULL);
- u_trace_context_process(&ctx->trace_context,
- !!(flags & PIPE_FLUSH_END_OF_FRAME));
+ u_trace_context_process(&ctx->trace_context,
+ !!(flags & PIPE_FLUSH_END_OF_FRAME));
}
static void
-fd_texture_barrier(struct pipe_context *pctx, unsigned flags)
- in_dt
+fd_texture_barrier(struct pipe_context *pctx, unsigned flags) in_dt
{
- if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
- struct fd_context *ctx = fd_context(pctx);
-
- if (ctx->framebuffer_barrier) {
- ctx->framebuffer_barrier(ctx);
- return;
- }
- }
-
- /* On devices that could sample from GMEM we could possibly do better.
- * Or if we knew that we were doing GMEM bypass we could just emit a
- * cache flush, perhaps? But we don't know if future draws would cause
- * us to use GMEM, and a flush in bypass isn't the end of the world.
- */
- fd_context_flush(pctx, NULL, 0);
+ if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (ctx->framebuffer_barrier) {
+ ctx->framebuffer_barrier(ctx);
+ return;
+ }
+ }
+
+ /* On devices that could sample from GMEM we could possibly do better.
+ * Or if we knew that we were doing GMEM bypass we could just emit a
+ * cache flush, perhaps? But we don't know if future draws would cause
+ * us to use GMEM, and a flush in bypass isn't the end of the world.
+ */
+ fd_context_flush(pctx, NULL, 0);
}
static void
fd_memory_barrier(struct pipe_context *pctx, unsigned flags)
{
- if (!(flags & ~PIPE_BARRIER_UPDATE))
- return;
+ if (!(flags & ~PIPE_BARRIER_UPDATE))
+ return;
- fd_context_flush(pctx, NULL, 0);
- /* TODO do we need to check for persistently mapped buffers and fd_bo_cpu_prep()?? */
+ fd_context_flush(pctx, NULL, 0);
+ /* TODO do we need to check for persistently mapped buffers and
+ * fd_bo_cpu_prep()?? */
}
static void
emit_string_tail(struct fd_ringbuffer *ring, const char *string, int len)
{
- const uint32_t *buf = (const void *)string;
-
- while (len >= 4) {
- OUT_RING(ring, *buf);
- buf++;
- len -= 4;
- }
-
- /* copy remainder bytes without reading past end of input string: */
- if (len > 0) {
- uint32_t w = 0;
- memcpy(&w, buf, len);
- OUT_RING(ring, w);
- }
+ const uint32_t *buf = (const void *)string;
+
+ while (len >= 4) {
+ OUT_RING(ring, *buf);
+ buf++;
+ len -= 4;
+ }
+
+ /* copy remainder bytes without reading past end of input string: */
+ if (len > 0) {
+ uint32_t w = 0;
+ memcpy(&w, buf, len);
+ OUT_RING(ring, w);
+ }
}
/* for prior to a5xx: */
void
-fd_emit_string(struct fd_ringbuffer *ring,
- const char *string, int len)
+fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len)
{
- /* max packet size is 0x3fff+1 dwords: */
- len = MIN2(len, 0x4000 * 4);
+ /* max packet size is 0x3fff+1 dwords: */
+ len = MIN2(len, 0x4000 * 4);
- OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
- emit_string_tail(ring, string, len);
+ OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
+ emit_string_tail(ring, string, len);
}
/* for a5xx+ */
void
-fd_emit_string5(struct fd_ringbuffer *ring,
- const char *string, int len)
+fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len)
{
- /* max packet size is 0x3fff dwords: */
- len = MIN2(len, 0x3fff * 4);
+ /* max packet size is 0x3fff dwords: */
+ len = MIN2(len, 0x3fff * 4);
- OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);
- emit_string_tail(ring, string, len);
+ OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);
+ emit_string_tail(ring, string, len);
}
/**
* decoded by cffdump.
*/
static void
-fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
- in_dt
+fd_emit_string_marker(struct pipe_context *pctx, const char *string,
+ int len) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (!ctx->batch)
- return;
+ if (!ctx->batch)
+ return;
- struct fd_batch *batch = fd_context_batch_locked(ctx);
+ struct fd_batch *batch = fd_context_batch_locked(ctx);
- ctx->batch->needs_flush = true;
+ ctx->batch->needs_flush = true;
- if (ctx->screen->gpu_id >= 500) {
- fd_emit_string5(batch->draw, string, len);
- } else {
- fd_emit_string(batch->draw, string, len);
- }
+ if (ctx->screen->gpu_id >= 500) {
+ fd_emit_string5(batch->draw, string, len);
+ } else {
+ fd_emit_string(batch->draw, string, len);
+ }
- fd_batch_unlock_submit(batch);
- fd_batch_reference(&batch, NULL);
+ fd_batch_unlock_submit(batch);
+ fd_batch_reference(&batch, NULL);
}
/**
void
fd_context_switch_from(struct fd_context *ctx)
{
- if (ctx->batch && (ctx->batch->in_fence_fd != -1))
- fd_batch_flush(ctx->batch);
+ if (ctx->batch && (ctx->batch->in_fence_fd != -1))
+ fd_batch_flush(ctx->batch);
}
/**
void
fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch)
{
- if (ctx->in_fence_fd != -1) {
- sync_accumulate("freedreno", &batch->in_fence_fd, ctx->in_fence_fd);
- close(ctx->in_fence_fd);
- ctx->in_fence_fd = -1;
- }
+ if (ctx->in_fence_fd != -1) {
+ sync_accumulate("freedreno", &batch->in_fence_fd, ctx->in_fence_fd);
+ close(ctx->in_fence_fd);
+ ctx->in_fence_fd = -1;
+ }
}
/**
struct fd_batch *
fd_context_batch(struct fd_context *ctx)
{
- struct fd_batch *batch = NULL;
+ struct fd_batch *batch = NULL;
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- fd_batch_reference(&batch, ctx->batch);
+ fd_batch_reference(&batch, ctx->batch);
- if (unlikely(!batch)) {
- batch = fd_batch_from_fb(&ctx->screen->batch_cache, ctx, &ctx->framebuffer);
- util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer);
- fd_batch_reference(&ctx->batch, batch);
- fd_context_all_dirty(ctx);
- }
- fd_context_switch_to(ctx, batch);
+ if (unlikely(!batch)) {
+ batch =
+ fd_batch_from_fb(&ctx->screen->batch_cache, ctx, &ctx->framebuffer);
+ util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer);
+ fd_batch_reference(&ctx->batch, batch);
+ fd_context_all_dirty(ctx);
+ }
+ fd_context_switch_to(ctx, batch);
- return batch;
+ return batch;
}
/**
struct fd_batch *
fd_context_batch_locked(struct fd_context *ctx)
{
- struct fd_batch *batch = NULL;
+ struct fd_batch *batch = NULL;
- while (!batch) {
- batch = fd_context_batch(ctx);
- if (!fd_batch_lock_submit(batch)) {
- fd_batch_reference(&batch, NULL);
- }
- }
+ while (!batch) {
+ batch = fd_context_batch(ctx);
+ if (!fd_batch_lock_submit(batch)) {
+ fd_batch_reference(&batch, NULL);
+ }
+ }
- return batch;
+ return batch;
}
void
fd_context_destroy(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- unsigned i;
+ struct fd_context *ctx = fd_context(pctx);
+ unsigned i;
- DBG("");
+ DBG("");
- fd_screen_lock(ctx->screen);
- list_del(&ctx->node);
- fd_screen_unlock(ctx->screen);
+ fd_screen_lock(ctx->screen);
+ list_del(&ctx->node);
+ fd_screen_unlock(ctx->screen);
- fd_fence_ref(&ctx->last_fence, NULL);
+ fd_fence_ref(&ctx->last_fence, NULL);
- if (ctx->in_fence_fd != -1)
- close(ctx->in_fence_fd);
+ if (ctx->in_fence_fd != -1)
+ close(ctx->in_fence_fd);
- for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) {
- if (ctx->pvtmem[i].bo)
- fd_bo_del(ctx->pvtmem[i].bo);
- }
+ for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) {
+ if (ctx->pvtmem[i].bo)
+ fd_bo_del(ctx->pvtmem[i].bo);
+ }
- util_copy_framebuffer_state(&ctx->framebuffer, NULL);
- fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
- fd_bc_invalidate_context(ctx);
+ util_copy_framebuffer_state(&ctx->framebuffer, NULL);
+ fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
+ fd_bc_invalidate_context(ctx);
- fd_prog_fini(pctx);
+ fd_prog_fini(pctx);
- if (ctx->blitter)
- util_blitter_destroy(ctx->blitter);
+ if (ctx->blitter)
+ util_blitter_destroy(ctx->blitter);
- if (pctx->stream_uploader)
- u_upload_destroy(pctx->stream_uploader);
+ if (pctx->stream_uploader)
+ u_upload_destroy(pctx->stream_uploader);
- for (i = 0; i < ARRAY_SIZE(ctx->clear_rs_state); i++)
- if (ctx->clear_rs_state[i])
- pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state[i]);
+ for (i = 0; i < ARRAY_SIZE(ctx->clear_rs_state); i++)
+ if (ctx->clear_rs_state[i])
+ pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state[i]);
- if (ctx->primconvert)
- util_primconvert_destroy(ctx->primconvert);
+ if (ctx->primconvert)
+ util_primconvert_destroy(ctx->primconvert);
- slab_destroy_child(&ctx->transfer_pool);
- slab_destroy_child(&ctx->transfer_pool_unsync);
+ slab_destroy_child(&ctx->transfer_pool);
+ slab_destroy_child(&ctx->transfer_pool_unsync);
- for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) {
- if (!ctx->vsc_pipe_bo[i])
- break;
- fd_bo_del(ctx->vsc_pipe_bo[i]);
- }
+ for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) {
+ if (!ctx->vsc_pipe_bo[i])
+ break;
+ fd_bo_del(ctx->vsc_pipe_bo[i]);
+ }
- fd_device_del(ctx->dev);
- fd_pipe_del(ctx->pipe);
+ fd_device_del(ctx->dev);
+ fd_pipe_del(ctx->pipe);
- simple_mtx_destroy(&ctx->gmem_lock);
+ simple_mtx_destroy(&ctx->gmem_lock);
- u_trace_context_fini(&ctx->trace_context);
+ u_trace_context_fini(&ctx->trace_context);
- fd_autotune_fini(&ctx->autotune);
+ fd_autotune_fini(&ctx->autotune);
- ir3_cache_destroy(ctx->shader_cache);
+ ir3_cache_destroy(ctx->shader_cache);
- if (FD_DBG(BSTAT) || FD_DBG(MSGS)) {
- mesa_logi("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
- (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
- (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
- (uint32_t)ctx->stats.batch_restore);
- }
+ if (FD_DBG(BSTAT) || FD_DBG(MSGS)) {
+ mesa_logi(
+ "batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, "
+ "batch_restore=%u\n",
+ (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
+ (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
+ (uint32_t)ctx->stats.batch_restore);
+ }
}
static void
fd_set_debug_callback(struct pipe_context *pctx,
- const struct pipe_debug_callback *cb)
+ const struct pipe_debug_callback *cb)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (cb)
- ctx->debug = *cb;
- else
- memset(&ctx->debug, 0, sizeof(ctx->debug));
+ if (cb)
+ ctx->debug = *cb;
+ else
+ memset(&ctx->debug, 0, sizeof(ctx->debug));
}
static uint32_t
fd_get_reset_count(struct fd_context *ctx, bool per_context)
{
- uint64_t val;
- enum fd_param_id param =
- per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS;
- int ret = fd_pipe_get_param(ctx->pipe, param, &val);
- debug_assert(!ret);
- return val;
+ uint64_t val;
+ enum fd_param_id param = per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS;
+ int ret = fd_pipe_get_param(ctx->pipe, param, &val);
+ debug_assert(!ret);
+ return val;
}
static enum pipe_reset_status
fd_get_device_reset_status(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- int context_faults = fd_get_reset_count(ctx, true);
- int global_faults = fd_get_reset_count(ctx, false);
- enum pipe_reset_status status;
-
- /* Not called in driver thread, but threaded_context syncs
- * before calling this:
- */
- fd_context_access_begin(ctx);
-
- if (context_faults != ctx->context_reset_count) {
- status = PIPE_GUILTY_CONTEXT_RESET;
- } else if (global_faults != ctx->global_reset_count) {
- status = PIPE_INNOCENT_CONTEXT_RESET;
- } else {
- status = PIPE_NO_RESET;
- }
-
- ctx->context_reset_count = context_faults;
- ctx->global_reset_count = global_faults;
-
- fd_context_access_end(ctx);
-
- return status;
+ struct fd_context *ctx = fd_context(pctx);
+ int context_faults = fd_get_reset_count(ctx, true);
+ int global_faults = fd_get_reset_count(ctx, false);
+ enum pipe_reset_status status;
+
+ /* Not called in driver thread, but threaded_context syncs
+ * before calling this:
+ */
+ fd_context_access_begin(ctx);
+
+ if (context_faults != ctx->context_reset_count) {
+ status = PIPE_GUILTY_CONTEXT_RESET;
+ } else if (global_faults != ctx->global_reset_count) {
+ status = PIPE_INNOCENT_CONTEXT_RESET;
+ } else {
+ status = PIPE_NO_RESET;
+ }
+
+ ctx->context_reset_count = context_faults;
+ ctx->global_reset_count = global_faults;
+
+ fd_context_access_end(ctx);
+
+ return status;
}
static void
fd_trace_record_ts(struct u_trace *ut, struct pipe_resource *timestamps,
- unsigned idx)
+ unsigned idx)
{
- struct fd_batch *batch = container_of(ut, struct fd_batch, trace);
- struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
-
- if (ring->cur == batch->last_timestamp_cmd) {
- uint64_t *ts = fd_bo_map(fd_resource(timestamps)->bo);
- ts[idx] = U_TRACE_NO_TIMESTAMP;
- return;
- }
-
- unsigned ts_offset = idx * sizeof(uint64_t);
- batch->ctx->record_timestamp(ring, fd_resource(timestamps)->bo, ts_offset);
- batch->last_timestamp_cmd = ring->cur;
+ struct fd_batch *batch = container_of(ut, struct fd_batch, trace);
+ struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
+
+ if (ring->cur == batch->last_timestamp_cmd) {
+ uint64_t *ts = fd_bo_map(fd_resource(timestamps)->bo);
+ ts[idx] = U_TRACE_NO_TIMESTAMP;
+ return;
+ }
+
+ unsigned ts_offset = idx * sizeof(uint64_t);
+ batch->ctx->record_timestamp(ring, fd_resource(timestamps)->bo, ts_offset);
+ batch->last_timestamp_cmd = ring->cur;
}
static uint64_t
fd_trace_read_ts(struct u_trace_context *utctx,
- struct pipe_resource *timestamps, unsigned idx)
+ struct pipe_resource *timestamps, unsigned idx)
{
- struct fd_context *ctx = container_of(utctx, struct fd_context, trace_context);
- struct fd_bo *ts_bo = fd_resource(timestamps)->bo;
+ struct fd_context *ctx =
+ container_of(utctx, struct fd_context, trace_context);
+ struct fd_bo *ts_bo = fd_resource(timestamps)->bo;
- /* Only need to stall on results for the first entry: */
- if (idx == 0) {
- int ret = fd_bo_cpu_prep(ts_bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
- if (ret)
- return U_TRACE_NO_TIMESTAMP;
- }
+ /* Only need to stall on results for the first entry: */
+ if (idx == 0) {
+ int ret = fd_bo_cpu_prep(ts_bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
+ if (ret)
+ return U_TRACE_NO_TIMESTAMP;
+ }
- uint64_t *ts = fd_bo_map(ts_bo);
+ uint64_t *ts = fd_bo_map(ts_bo);
- /* Don't translate the no-timestamp marker: */
- if (ts[idx] == U_TRACE_NO_TIMESTAMP)
- return U_TRACE_NO_TIMESTAMP;
+ /* Don't translate the no-timestamp marker: */
+ if (ts[idx] == U_TRACE_NO_TIMESTAMP)
+ return U_TRACE_NO_TIMESTAMP;
- return ctx->ts_to_ns(ts[idx]);
+ return ctx->ts_to_ns(ts[idx]);
}
/* TODO we could combine a few of these small buffers (solid_vbuf,
static struct pipe_resource *
create_solid_vertexbuf(struct pipe_context *pctx)
{
- static const float init_shader_const[] = {
- -1.000000, +1.000000, +1.000000,
- +1.000000, -1.000000, +1.000000,
- };
- struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
- PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
- pipe_buffer_write(pctx, prsc, 0,
- sizeof(init_shader_const), init_shader_const);
- return prsc;
+ static const float init_shader_const[] = {
+ -1.000000, +1.000000, +1.000000, +1.000000, -1.000000, +1.000000,
+ };
+ struct pipe_resource *prsc =
+ pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
+ sizeof(init_shader_const));
+ pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),
+ init_shader_const);
+ return prsc;
}
static struct pipe_resource *
create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
{
- struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
- PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
- return prsc;
+ struct pipe_resource *prsc = pipe_buffer_create(
+ pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+ return prsc;
}
void
fd_context_setup_common_vbos(struct fd_context *ctx)
{
- struct pipe_context *pctx = &ctx->base;
-
- ctx->solid_vbuf = create_solid_vertexbuf(pctx);
- ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
-
- /* setup solid_vbuf_state: */
- ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
- pctx, 1, (struct pipe_vertex_element[]){{
- .vertex_buffer_index = 0,
- .src_offset = 0,
- .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
- }});
- ctx->solid_vbuf_state.vertexbuf.count = 1;
- ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
- ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
-
- /* setup blit_vbuf_state: */
- ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
- pctx, 2, (struct pipe_vertex_element[]){{
- .vertex_buffer_index = 0,
- .src_offset = 0,
- .src_format = PIPE_FORMAT_R32G32_FLOAT,
- }, {
- .vertex_buffer_index = 1,
- .src_offset = 0,
- .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
- }});
- ctx->blit_vbuf_state.vertexbuf.count = 2;
- ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
- ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf;
- ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
- ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
+ struct pipe_context *pctx = &ctx->base;
+
+ ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+ ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+ /* setup solid_vbuf_state: */
+ ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 1,
+ (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ ctx->solid_vbuf_state.vertexbuf.count = 1;
+ ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+ ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
+
+ /* setup blit_vbuf_state: */
+ ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 2,
+ (struct pipe_vertex_element[]){
+ {
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32_FLOAT,
+ },
+ {
+ .vertex_buffer_index = 1,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ ctx->blit_vbuf_state.vertexbuf.count = 2;
+ ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+ ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource =
+ ctx->blit_texcoord_vbuf;
+ ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+ ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
}
void
fd_context_cleanup_common_vbos(struct fd_context *ctx)
{
- struct pipe_context *pctx = &ctx->base;
+ struct pipe_context *pctx = &ctx->base;
- pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
- pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);
+ pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
+ pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);
- pipe_resource_reference(&ctx->solid_vbuf, NULL);
- pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);
+ pipe_resource_reference(&ctx->solid_vbuf, NULL);
+ pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);
}
struct pipe_context *
fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
- const uint8_t *primtypes, void *priv, unsigned flags)
- disable_thread_safety_analysis
+ const uint8_t *primtypes, void *priv,
+ unsigned flags) disable_thread_safety_analysis
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct pipe_context *pctx;
- unsigned prio = 1;
- int i;
-
- /* lower numerical value == higher priority: */
- if (FD_DBG(HIPRIO))
- prio = 0;
- else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
- prio = 0;
- else if (flags & PIPE_CONTEXT_LOW_PRIORITY)
- prio = 2;
-
- /* Some of the stats will get printed out at context destroy, so
- * make sure they are collected:
- */
- if (FD_DBG(BSTAT) || FD_DBG(MSGS))
- ctx->stats_users++;
-
- ctx->screen = screen;
- ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);
-
- ctx->in_fence_fd = -1;
-
- if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) {
- ctx->context_reset_count = fd_get_reset_count(ctx, true);
- ctx->global_reset_count = fd_get_reset_count(ctx, false);
- }
-
- ctx->primtypes = primtypes;
- ctx->primtype_mask = 0;
- for (i = 0; i <= PIPE_PRIM_MAX; i++)
- if (primtypes[i])
- ctx->primtype_mask |= (1 << i);
-
- simple_mtx_init(&ctx->gmem_lock, mtx_plain);
-
- /* need some sane default in case gallium frontends don't
- * set some state:
- */
- ctx->sample_mask = 0xffff;
- ctx->active_queries = true;
-
- pctx = &ctx->base;
- pctx->screen = pscreen;
- pctx->priv = priv;
- pctx->flush = fd_context_flush;
- pctx->emit_string_marker = fd_emit_string_marker;
- pctx->set_debug_callback = fd_set_debug_callback;
- pctx->get_device_reset_status = fd_get_device_reset_status;
- pctx->create_fence_fd = fd_create_fence_fd;
- pctx->fence_server_sync = fd_fence_server_sync;
- pctx->fence_server_signal = fd_fence_server_signal;
- pctx->texture_barrier = fd_texture_barrier;
- pctx->memory_barrier = fd_memory_barrier;
-
- pctx->stream_uploader = u_upload_create_default(pctx);
- if (!pctx->stream_uploader)
- goto fail;
- pctx->const_uploader = pctx->stream_uploader;
-
- slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
- slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);
-
- fd_draw_init(pctx);
- fd_resource_context_init(pctx);
- fd_query_context_init(pctx);
- fd_texture_init(pctx);
- fd_state_init(pctx);
-
- ctx->blitter = util_blitter_create(pctx);
- if (!ctx->blitter)
- goto fail;
-
- ctx->primconvert = util_primconvert_create(pctx, ctx->primtype_mask);
- if (!ctx->primconvert)
- goto fail;
-
- list_inithead(&ctx->hw_active_queries);
- list_inithead(&ctx->acc_active_queries);
-
- fd_screen_lock(ctx->screen);
- ctx->seqno = ++screen->ctx_seqno;
- list_add(&ctx->node, &ctx->screen->context_list);
- fd_screen_unlock(ctx->screen);
-
- ctx->current_scissor = &ctx->disabled_scissor;
-
- u_trace_context_init(&ctx->trace_context, pctx,
- fd_trace_record_ts, fd_trace_read_ts);
-
- fd_autotune_init(&ctx->autotune, screen->dev);
-
- return pctx;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct pipe_context *pctx;
+ unsigned prio = 1;
+ int i;
+
+ /* lower numerical value == higher priority: */
+ if (FD_DBG(HIPRIO))
+ prio = 0;
+ else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
+ prio = 0;
+ else if (flags & PIPE_CONTEXT_LOW_PRIORITY)
+ prio = 2;
+
+ /* Some of the stats will get printed out at context destroy, so
+ * make sure they are collected:
+ */
+ if (FD_DBG(BSTAT) || FD_DBG(MSGS))
+ ctx->stats_users++;
+
+ ctx->screen = screen;
+ ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);
+
+ ctx->in_fence_fd = -1;
+
+ if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) {
+ ctx->context_reset_count = fd_get_reset_count(ctx, true);
+ ctx->global_reset_count = fd_get_reset_count(ctx, false);
+ }
+
+ ctx->primtypes = primtypes;
+ ctx->primtype_mask = 0;
+ for (i = 0; i <= PIPE_PRIM_MAX; i++)
+ if (primtypes[i])
+ ctx->primtype_mask |= (1 << i);
+
+ simple_mtx_init(&ctx->gmem_lock, mtx_plain);
+
+ /* need some sane default in case gallium frontends don't
+ * set some state:
+ */
+ ctx->sample_mask = 0xffff;
+ ctx->active_queries = true;
+
+ pctx = &ctx->base;
+ pctx->screen = pscreen;
+ pctx->priv = priv;
+ pctx->flush = fd_context_flush;
+ pctx->emit_string_marker = fd_emit_string_marker;
+ pctx->set_debug_callback = fd_set_debug_callback;
+ pctx->get_device_reset_status = fd_get_device_reset_status;
+ pctx->create_fence_fd = fd_create_fence_fd;
+ pctx->fence_server_sync = fd_fence_server_sync;
+ pctx->fence_server_signal = fd_fence_server_signal;
+ pctx->texture_barrier = fd_texture_barrier;
+ pctx->memory_barrier = fd_memory_barrier;
+
+ pctx->stream_uploader = u_upload_create_default(pctx);
+ if (!pctx->stream_uploader)
+ goto fail;
+ pctx->const_uploader = pctx->stream_uploader;
+
+ slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
+ slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);
+
+ fd_draw_init(pctx);
+ fd_resource_context_init(pctx);
+ fd_query_context_init(pctx);
+ fd_texture_init(pctx);
+ fd_state_init(pctx);
+
+ ctx->blitter = util_blitter_create(pctx);
+ if (!ctx->blitter)
+ goto fail;
+
+ ctx->primconvert = util_primconvert_create(pctx, ctx->primtype_mask);
+ if (!ctx->primconvert)
+ goto fail;
+
+ list_inithead(&ctx->hw_active_queries);
+ list_inithead(&ctx->acc_active_queries);
+
+ fd_screen_lock(ctx->screen);
+ ctx->seqno = ++screen->ctx_seqno;
+ list_add(&ctx->node, &ctx->screen->context_list);
+ fd_screen_unlock(ctx->screen);
+
+ ctx->current_scissor = &ctx->disabled_scissor;
+
+ u_trace_context_init(&ctx->trace_context, pctx, fd_trace_record_ts,
+ fd_trace_read_ts);
+
+ fd_autotune_init(&ctx->autotune, screen->dev);
+
+ return pctx;
fail:
- pctx->destroy(pctx);
- return NULL;
+ pctx->destroy(pctx);
+ return NULL;
}
struct pipe_context *
fd_context_init_tc(struct pipe_context *pctx, unsigned flags)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (!(flags & PIPE_CONTEXT_PREFER_THREADED))
- return pctx;
+ if (!(flags & PIPE_CONTEXT_PREFER_THREADED))
+ return pctx;
- /* Clover (compute-only) is unsupported. */
- if (flags & PIPE_CONTEXT_COMPUTE_ONLY)
- return pctx;
+ /* Clover (compute-only) is unsupported. */
+ if (flags & PIPE_CONTEXT_COMPUTE_ONLY)
+ return pctx;
- struct pipe_context *tc = threaded_context_create(pctx,
- &ctx->screen->transfer_pool,
- fd_replace_buffer_storage,
- fd_fence_create_unflushed,
- &ctx->tc);
+ struct pipe_context *tc = threaded_context_create(
+ pctx, &ctx->screen->transfer_pool, fd_replace_buffer_storage,
+ fd_fence_create_unflushed, &ctx->tc);
- uint64_t total_ram;
- if (tc && tc != pctx && os_get_total_physical_memory(&total_ram)) {
- ((struct threaded_context *) tc)->bytes_mapped_limit = total_ram / 16;
- }
+ uint64_t total_ram;
+ if (tc && tc != pctx && os_get_total_physical_memory(&total_ram)) {
+ ((struct threaded_context *)tc)->bytes_mapped_limit = total_ram / 16;
+ }
- return tc;
+ return tc;
}
#ifndef FREEDRENO_CONTEXT_H_
#define FREEDRENO_CONTEXT_H_
-#include "pipe/p_context.h"
#include "indices/u_primconvert.h"
-#include "util/u_blitter.h"
+#include "pipe/p_context.h"
#include "util/libsync.h"
#include "util/list.h"
#include "util/slab.h"
+#include "util/u_blitter.h"
#include "util/u_string.h"
#include "util/u_threaded_context.h"
#include "util/u_trace.h"
#include "freedreno_autotune.h"
-#include "freedreno_screen.h"
#include "freedreno_gmem.h"
+#include "freedreno_screen.h"
#include "freedreno_util.h"
#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
struct fd_batch;
struct fd_texture_stateobj {
- struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
- unsigned num_textures;
- unsigned valid_textures;
- struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
- unsigned num_samplers;
- unsigned valid_samplers;
- /* number of samples per sampler, 2 bits per sampler: */
- uint32_t samples;
+ struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+ unsigned num_textures;
+ unsigned valid_textures;
+ struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+ unsigned valid_samplers;
+ /* number of samples per sampler, 2 bits per sampler: */
+ uint32_t samples;
};
struct fd_program_stateobj {
- void *vs, *hs, *ds, *gs, *fs;
+ void *vs, *hs, *ds, *gs, *fs;
};
struct fd_constbuf_stateobj {
- struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t enabled_mask;
+ struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+ uint32_t enabled_mask;
};
struct fd_shaderbuf_stateobj {
- struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
- uint32_t enabled_mask;
- uint32_t writable_mask;
+ struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
+ uint32_t enabled_mask;
+ uint32_t writable_mask;
};
struct fd_shaderimg_stateobj {
- struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
- uint32_t enabled_mask;
+ struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
+ uint32_t enabled_mask;
};
struct fd_vertexbuf_stateobj {
- struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
- unsigned count;
- uint32_t enabled_mask;
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ unsigned count;
+ uint32_t enabled_mask;
};
struct fd_vertex_stateobj {
- struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
- unsigned num_elements;
+ struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+ unsigned num_elements;
};
struct fd_stream_output_target {
- struct pipe_stream_output_target base;
- struct pipe_resource *offset_buf;
- /* stride of the last stream out recorded to this target, for glDrawTransformFeedback(). */
- uint32_t stride;
+ struct pipe_stream_output_target base;
+ struct pipe_resource *offset_buf;
+ /* stride of the last stream out recorded to this target, for
+ * glDrawTransformFeedback(). */
+ uint32_t stride;
};
struct fd_streamout_stateobj {
- struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
- /* Bitmask of stream that should be reset. */
- unsigned reset;
-
- unsigned num_targets;
- /* Track offset from vtxcnt for streamout data. This counter
- * is just incremented by # of vertices on each draw until
- * reset or new streamout buffer bound.
- *
- * When we eventually have GS, the CPU won't actually know the
- * number of vertices per draw, so I think we'll have to do
- * something more clever.
- */
- unsigned offsets[PIPE_MAX_SO_BUFFERS];
-
- /* Pre-a6xx, the maximum number of vertices that could be recorded to this
- * set of targets with the current vertex shader. a6xx and newer, hardware
- * queries are used.
- */
- unsigned max_tf_vtx;
-
- /* Pre-a6xx, the number of verts written to the buffers since the last
- * Begin. Used for overflow checking for SW queries.
- */
- unsigned verts_written;
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ /* Bitmask of stream that should be reset. */
+ unsigned reset;
+
+ unsigned num_targets;
+ /* Track offset from vtxcnt for streamout data. This counter
+ * is just incremented by # of vertices on each draw until
+ * reset or new streamout buffer bound.
+ *
+ * When we eventually have GS, the CPU won't actually know the
+ * number of vertices per draw, so I think we'll have to do
+ * something more clever.
+ */
+ unsigned offsets[PIPE_MAX_SO_BUFFERS];
+
+ /* Pre-a6xx, the maximum number of vertices that could be recorded to this
+ * set of targets with the current vertex shader. a6xx and newer, hardware
+ * queries are used.
+ */
+ unsigned max_tf_vtx;
+
+ /* Pre-a6xx, the number of verts written to the buffers since the last
+ * Begin. Used for overflow checking for SW queries.
+ */
+ unsigned verts_written;
};
#define MAX_GLOBAL_BUFFERS 16
struct fd_global_bindings_stateobj {
- struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
- uint32_t enabled_mask;
+ struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
+ uint32_t enabled_mask;
};
/* group together the vertex and vertexbuf state.. for ease of passing
* need their own vertex state:
*/
struct fd_vertex_state {
- struct fd_vertex_stateobj *vtx;
- struct fd_vertexbuf_stateobj vertexbuf;
+ struct fd_vertex_stateobj *vtx;
+ struct fd_vertexbuf_stateobj vertexbuf;
};
/* global 3d pipeline dirty state: */
enum fd_dirty_3d_state {
- FD_DIRTY_BLEND = BIT(0),
- FD_DIRTY_RASTERIZER = BIT(1),
- FD_DIRTY_ZSA = BIT(2),
- FD_DIRTY_BLEND_COLOR = BIT(3),
- FD_DIRTY_STENCIL_REF = BIT(4),
- FD_DIRTY_SAMPLE_MASK = BIT(5),
- FD_DIRTY_FRAMEBUFFER = BIT(6),
- FD_DIRTY_STIPPLE = BIT(7),
- FD_DIRTY_VIEWPORT = BIT(8),
- FD_DIRTY_VTXSTATE = BIT(9),
- FD_DIRTY_VTXBUF = BIT(10),
- FD_DIRTY_MIN_SAMPLES = BIT(11),
- FD_DIRTY_SCISSOR = BIT(12),
- FD_DIRTY_STREAMOUT = BIT(13),
- FD_DIRTY_UCP = BIT(14),
- FD_DIRTY_PROG = BIT(15),
- FD_DIRTY_CONST = BIT(16),
- FD_DIRTY_TEX = BIT(17),
- FD_DIRTY_IMAGE = BIT(18),
- FD_DIRTY_SSBO = BIT(19),
-
- /* only used by a2xx.. possibly can be removed.. */
- FD_DIRTY_TEXSTATE = BIT(20),
-
- /* fine grained state changes, for cases where state is not orthogonal
- * from hw perspective:
- */
- FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
- FD_DIRTY_BLEND_DUAL = BIT(25),
+ FD_DIRTY_BLEND = BIT(0),
+ FD_DIRTY_RASTERIZER = BIT(1),
+ FD_DIRTY_ZSA = BIT(2),
+ FD_DIRTY_BLEND_COLOR = BIT(3),
+ FD_DIRTY_STENCIL_REF = BIT(4),
+ FD_DIRTY_SAMPLE_MASK = BIT(5),
+ FD_DIRTY_FRAMEBUFFER = BIT(6),
+ FD_DIRTY_STIPPLE = BIT(7),
+ FD_DIRTY_VIEWPORT = BIT(8),
+ FD_DIRTY_VTXSTATE = BIT(9),
+ FD_DIRTY_VTXBUF = BIT(10),
+ FD_DIRTY_MIN_SAMPLES = BIT(11),
+ FD_DIRTY_SCISSOR = BIT(12),
+ FD_DIRTY_STREAMOUT = BIT(13),
+ FD_DIRTY_UCP = BIT(14),
+ FD_DIRTY_PROG = BIT(15),
+ FD_DIRTY_CONST = BIT(16),
+ FD_DIRTY_TEX = BIT(17),
+ FD_DIRTY_IMAGE = BIT(18),
+ FD_DIRTY_SSBO = BIT(19),
+
+ /* only used by a2xx.. possibly can be removed.. */
+ FD_DIRTY_TEXSTATE = BIT(20),
+
+ /* fine grained state changes, for cases where state is not orthogonal
+ * from hw perspective:
+ */
+ FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
+ FD_DIRTY_BLEND_DUAL = BIT(25),
#define NUM_DIRTY_BITS 26
- /* additional flag for state requires updated resource tracking: */
- FD_DIRTY_RESOURCE = BIT(31),
+ /* additional flag for state requires updated resource tracking: */
+ FD_DIRTY_RESOURCE = BIT(31),
};
/* per shader-stage dirty state: */
enum fd_dirty_shader_state {
- FD_DIRTY_SHADER_PROG = BIT(0),
- FD_DIRTY_SHADER_CONST = BIT(1),
- FD_DIRTY_SHADER_TEX = BIT(2),
- FD_DIRTY_SHADER_SSBO = BIT(3),
- FD_DIRTY_SHADER_IMAGE = BIT(4),
+ FD_DIRTY_SHADER_PROG = BIT(0),
+ FD_DIRTY_SHADER_CONST = BIT(1),
+ FD_DIRTY_SHADER_TEX = BIT(2),
+ FD_DIRTY_SHADER_SSBO = BIT(3),
+ FD_DIRTY_SHADER_IMAGE = BIT(4),
#define NUM_DIRTY_SHADER_BITS 5
};
struct ir3_shader_key;
struct fd_context {
- struct pipe_context base;
-
- struct threaded_context *tc;
-
- struct list_head node; /* node in screen->context_list */
-
- /* We currently need to serialize emitting GMEM batches, because of
- * VSC state access in the context.
- *
- * In practice this lock should not be contended, since pipe_context
- * use should be single threaded. But it is needed to protect the
- * case, with batch reordering where a ctxB batch triggers flushing
- * a ctxA batch
- */
- simple_mtx_t gmem_lock;
-
- struct fd_device *dev;
- struct fd_screen *screen;
- struct fd_pipe *pipe;
-
- struct blitter_context *blitter dt;
- void *clear_rs_state[2] dt;
- struct primconvert_context *primconvert dt;
-
- /* slab for pipe_transfer allocations: */
- struct slab_child_pool transfer_pool dt;
- struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
-
- struct fd_autotune autotune dt;
-
- /**
- * query related state:
- */
- /*@{*/
- /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
- struct slab_mempool sample_pool dt;
- struct slab_mempool sample_period_pool dt;
-
- /* sample-providers for hw queries: */
- const struct fd_hw_sample_provider *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
-
- /* list of active queries: */
- struct list_head hw_active_queries dt;
-
- /* sample-providers for accumulating hw queries: */
- const struct fd_acc_sample_provider *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
-
- /* list of active accumulating queries: */
- struct list_head acc_active_queries dt;
- /*@}*/
-
- /* Whether we need to recheck the active_queries list next
- * fd_batch_update_queries().
- */
- bool update_active_queries dt;
-
- /* Current state of pctx->set_active_query_state() (i.e. "should drawing
- * be counted against non-perfcounter queries")
- */
- bool active_queries dt;
-
- /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
- * DI_PT_x value to use for draw initiator. There are some
- * slight differences between generation:
- */
- const uint8_t *primtypes;
- uint32_t primtype_mask;
-
- /* shaders used by clear, and gmem->mem blits: */
- struct fd_program_stateobj solid_prog; // TODO move to screen?
- struct fd_program_stateobj solid_layered_prog;
-
- /* shaders used by mem->gmem blits: */
- struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
- struct fd_program_stateobj blit_z, blit_zs;
-
- /* Stats/counters:
- */
- struct {
- uint64_t prims_emitted;
- uint64_t prims_generated;
- uint64_t draw_calls;
- uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
- uint64_t staging_uploads, shadow_uploads;
- uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
- } stats dt;
-
- /* Counter for number of users who need sw counters (so we can
- * skip collecting them when not needed)
- */
- unsigned stats_users;
-
- /* Current batch.. the rule here is that you can deref ctx->batch
- * in codepaths from pipe_context entrypoints. But not in code-
- * paths from fd_batch_flush() (basically, the stuff that gets
- * called from GMEM code), since in those code-paths the batch
- * you care about is not necessarily the same as ctx->batch.
- */
- struct fd_batch *batch dt;
-
- /* NULL if there has been rendering since last flush. Otherwise
- * keeps a reference to the last fence so we can re-use it rather
- * than having to flush no-op batch.
- */
- struct pipe_fence_handle *last_fence dt;
-
- /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
- * if none). The in-fence is transferred over to the batch on the
- * next draw/blit/grid.
- *
- * The reason for this extra complexity is that apps will typically
- * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
- * first draw. But mesa/st doesn't flush down framebuffer state
- * change until we hit a draw, so at ->fence_server_sync() time, we
- * don't yet have the correct batch. If we created a batch at that
- * point, it would be the wrong one, and we'd have to flush it pre-
- * maturely, causing us to stall early in the frame where we could
- * be building up cmdstream.
- */
- int in_fence_fd dt;
-
- /* track last known reset status globally and per-context to
- * determine if more resets occurred since then. If global reset
- * count increases, it means some other context crashed. If
- * per-context reset count increases, it means we crashed the
- * gpu.
- */
- uint32_t context_reset_count dt;
- uint32_t global_reset_count dt;
-
- /* Context sequence #, used for batch-cache key: */
- uint16_t seqno;
-
- /* Cost per draw, used in conjunction with samples-passed history to
- * estimate whether GMEM or bypass is the better option.
- */
- uint8_t draw_cost;
-
- /* Are we in process of shadowing a resource? Used to detect recursion
- * in transfer_map, and skip unneeded synchronization.
- */
- bool in_shadow : 1 dt;
-
- /* Ie. in blit situation where we no longer care about previous framebuffer
- * contents. Main point is to eliminate blits from fd_try_shadow_resource().
- * For example, in case of texture upload + gen-mipmaps.
- */
- bool in_discard_blit : 1 dt;
-
- /* points to either scissor or disabled_scissor depending on rast state: */
- struct pipe_scissor_state *current_scissor dt;
-
- struct pipe_scissor_state scissor dt;
-
- /* we don't have a disable/enable bit for scissor, so instead we keep
- * a disabled-scissor state which matches the entire bound framebuffer
- * and use that when scissor is not enabled.
- */
- struct pipe_scissor_state disabled_scissor dt;
-
- /* Per vsc pipe bo's (a2xx-a5xx): */
- struct fd_bo *vsc_pipe_bo[32] dt;
-
- /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
- * specific bitmask of state "groups".
- */
- uint32_t gen_dirty_map[NUM_DIRTY_BITS];
- uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
-
- /* Bitmask of all possible gen_dirty bits: */
- uint32_t gen_all_dirty;
-
- /* Generation specific bitmask of dirty state groups: */
- uint32_t gen_dirty;
-
- /* which state objects need to be re-emit'd: */
- enum fd_dirty_3d_state dirty dt;
-
- /* per shader-stage dirty status: */
- enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
-
- void *compute dt;
- struct pipe_blend_state *blend dt;
- struct pipe_rasterizer_state *rasterizer dt;
- struct pipe_depth_stencil_alpha_state *zsa dt;
-
- struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
-
- struct fd_program_stateobj prog dt;
- uint32_t bound_shader_stages dt;
-
- struct fd_vertex_state vtx dt;
-
- struct pipe_blend_color blend_color dt;
- struct pipe_stencil_ref stencil_ref dt;
- unsigned sample_mask dt;
- unsigned min_samples dt;
- /* local context fb state, for when ctx->batch is null: */
- struct pipe_framebuffer_state framebuffer dt;
- struct pipe_poly_stipple stipple dt;
- struct pipe_viewport_state viewport dt;
- struct pipe_scissor_state viewport_scissor dt;
- struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
- struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
- struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
- struct fd_streamout_stateobj streamout dt;
- struct fd_global_bindings_stateobj global_bindings dt;
- struct pipe_clip_state ucp dt;
-
- struct pipe_query *cond_query dt;
- bool cond_cond dt; /* inverted rendering condition */
- uint cond_mode dt;
-
- /* Private memory is a memory space where each fiber gets its own piece of
- * memory, in addition to registers. It is backed by a buffer which needs
- * to be large enough to hold the contents of every possible wavefront in
- * every core of the GPU. Because it allocates space via the internal
- * wavefront ID which is shared between all currently executing shaders,
- * the same buffer can be reused by all shaders, as long as all shaders
- * sharing the same buffer use the exact same configuration. There are two
- * inputs to the configuration, the amount of per-fiber space and whether
- * to use the newer per-wave or older per-fiber layout. We only ever
- * increase the size, and shaders with a smaller size requirement simply
- * use the larger existing buffer, so that we only need to keep track of
- * one buffer and its size, but we still need to keep track of per-fiber
- * and per-wave buffers separately so that we never use the same buffer
- * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
- * per-wave.
- */
- struct {
- struct fd_bo *bo;
- uint32_t per_fiber_size;
- } pvtmem[2] dt;
-
- /* maps per-shader-stage state plus variant key to hw
- * program stateobj:
- */
- struct ir3_cache *shader_cache;
-
- struct pipe_debug_callback debug;
-
- struct u_trace_context trace_context dt;
-
- /* Called on rebind_resource() for any per-gen cleanup required: */
- void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
-
- /* GMEM/tile handling fxns: */
- void (*emit_tile_init)(struct fd_batch *batch) dt;
- void (*emit_tile_prep)(struct fd_batch *batch, const struct fd_tile *tile) dt;
- void (*emit_tile_mem2gmem)(struct fd_batch *batch, const struct fd_tile *tile) dt;
- void (*emit_tile_renderprep)(struct fd_batch *batch, const struct fd_tile *tile) dt;
- void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
- void (*emit_tile_gmem2mem)(struct fd_batch *batch, const struct fd_tile *tile) dt;
- void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
-
- /* optional, for GMEM bypass: */
- void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
- void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
-
- /* draw: */
- bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
- const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draw,
- unsigned index_offset) dt;
- bool (*clear)(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil) dt;
-
- /* compute: */
- void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info) dt;
-
- /* query: */
- struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type, unsigned index);
- void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
- void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
- struct fd_ringbuffer *ring) dt;
- void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
-
- /* blitter: */
- bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
- void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
-
- /* handling for barriers: */
- void (*framebuffer_barrier)(struct fd_context *ctx) dt;
-
- /* logger: */
- void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset);
- uint64_t (*ts_to_ns)(uint64_t ts);
-
- /*
- * Common pre-cooked VBO state (used for a3xx and later):
- */
-
- /* for clear/gmem->mem vertices, and mem->gmem */
- struct pipe_resource *solid_vbuf;
-
- /* for mem->gmem tex coords: */
- struct pipe_resource *blit_texcoord_vbuf;
-
- /* vertex state for solid_vbuf:
- * - solid_vbuf / 12 / R32G32B32_FLOAT
- */
- struct fd_vertex_state solid_vbuf_state;
-
- /* vertex state for blit_prog:
- * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
- * - solid_vbuf / 12 / R32G32B32_FLOAT
- */
- struct fd_vertex_state blit_vbuf_state;
-
- /*
- * Info about state of previous draw, for state that comes from
- * pipe_draw_info (ie. not part of a CSO). This allows us to
- * skip some register emit when the state doesn't change from
- * draw-to-draw
- */
- struct {
- bool dirty; /* last draw state unknown */
- bool primitive_restart;
- uint32_t index_start;
- uint32_t instance_start;
- uint32_t restart_index;
- uint32_t streamout_mask;
-
- /* some state changes require a different shader variant. Keep
- * track of this so we know when we need to re-emit shader state
- * due to variant change. See ir3_fixup_shader_state()
- *
- * (used for a3xx+, NULL otherwise)
- */
- struct ir3_shader_key *key;
-
- } last dt;
+ struct pipe_context base;
+
+ struct threaded_context *tc;
+
+ struct list_head node; /* node in screen->context_list */
+
+ /* We currently need to serialize emitting GMEM batches, because of
+ * VSC state access in the context.
+ *
+ * In practice this lock should not be contended, since pipe_context
+ * use should be single threaded. But it is needed to protect the
+ * case, with batch reordering where a ctxB batch triggers flushing
+ * a ctxA batch
+ */
+ simple_mtx_t gmem_lock;
+
+ struct fd_device *dev;
+ struct fd_screen *screen;
+ struct fd_pipe *pipe;
+
+ struct blitter_context *blitter dt;
+ void *clear_rs_state[2] dt;
+ struct primconvert_context *primconvert dt;
+
+ /* slab for pipe_transfer allocations: */
+ struct slab_child_pool transfer_pool dt;
+ struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
+
+ struct fd_autotune autotune dt;
+
+ /**
+ * query related state:
+ */
+ /*@{*/
+ /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
+ struct slab_mempool sample_pool dt;
+ struct slab_mempool sample_period_pool dt;
+
+ /* sample-providers for hw queries: */
+ const struct fd_hw_sample_provider
+ *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
+
+ /* list of active queries: */
+ struct list_head hw_active_queries dt;
+
+ /* sample-providers for accumulating hw queries: */
+ const struct fd_acc_sample_provider
+ *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
+
+ /* list of active accumulating queries: */
+ struct list_head acc_active_queries dt;
+ /*@}*/
+
+ /* Whether we need to recheck the active_queries list next
+ * fd_batch_update_queries().
+ */
+ bool update_active_queries dt;
+
+ /* Current state of pctx->set_active_query_state() (i.e. "should drawing
+ * be counted against non-perfcounter queries")
+ */
+ bool active_queries dt;
+
+ /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
+ * DI_PT_x value to use for draw initiator. There are some
+ * slight differences between generation:
+ */
+ const uint8_t *primtypes;
+ uint32_t primtype_mask;
+
+ /* shaders used by clear, and gmem->mem blits: */
+ struct fd_program_stateobj solid_prog; // TODO move to screen?
+ struct fd_program_stateobj solid_layered_prog;
+
+ /* shaders used by mem->gmem blits: */
+ struct fd_program_stateobj
+ blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
+ struct fd_program_stateobj blit_z, blit_zs;
+
+ /* Stats/counters:
+ */
+ struct {
+ uint64_t prims_emitted;
+ uint64_t prims_generated;
+ uint64_t draw_calls;
+ uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
+ batch_restore;
+ uint64_t staging_uploads, shadow_uploads;
+ uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
+ } stats dt;
+
+ /* Counter for number of users who need sw counters (so we can
+ * skip collecting them when not needed)
+ */
+ unsigned stats_users;
+
+ /* Current batch.. the rule here is that you can deref ctx->batch
+ * in codepaths from pipe_context entrypoints. But not in code-
+ * paths from fd_batch_flush() (basically, the stuff that gets
+ * called from GMEM code), since in those code-paths the batch
+ * you care about is not necessarily the same as ctx->batch.
+ */
+ struct fd_batch *batch dt;
+
+ /* NULL if there has been rendering since last flush. Otherwise
+ * keeps a reference to the last fence so we can re-use it rather
+ * than having to flush no-op batch.
+ */
+ struct pipe_fence_handle *last_fence dt;
+
+ /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
+ * if none). The in-fence is transferred over to the batch on the
+ * next draw/blit/grid.
+ *
+ * The reason for this extra complexity is that apps will typically
+ * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
+ * first draw. But mesa/st doesn't flush down framebuffer state
+ * change until we hit a draw, so at ->fence_server_sync() time, we
+ * don't yet have the correct batch. If we created a batch at that
+ * point, it would be the wrong one, and we'd have to flush it pre-
+ * maturely, causing us to stall early in the frame where we could
+ * be building up cmdstream.
+ */
+ int in_fence_fd dt;
+
+ /* track last known reset status globally and per-context to
+ * determine if more resets occurred since then. If global reset
+ * count increases, it means some other context crashed. If
+ * per-context reset count increases, it means we crashed the
+ * gpu.
+ */
+ uint32_t context_reset_count dt;
+ uint32_t global_reset_count dt;
+
+ /* Context sequence #, used for batch-cache key: */
+ uint16_t seqno;
+
+ /* Cost per draw, used in conjunction with samples-passed history to
+ * estimate whether GMEM or bypass is the better option.
+ */
+ uint8_t draw_cost;
+
+ /* Are we in process of shadowing a resource? Used to detect recursion
+ * in transfer_map, and skip unneeded synchronization.
+ */
+ bool in_shadow : 1 dt;
+
+ /* Ie. in blit situation where we no longer care about previous framebuffer
+ * contents. Main point is to eliminate blits from fd_try_shadow_resource().
+ * For example, in case of texture upload + gen-mipmaps.
+ */
+ bool in_discard_blit : 1 dt;
+
+ /* points to either scissor or disabled_scissor depending on rast state: */
+ struct pipe_scissor_state *current_scissor dt;
+
+ struct pipe_scissor_state scissor dt;
+
+ /* we don't have a disable/enable bit for scissor, so instead we keep
+ * a disabled-scissor state which matches the entire bound framebuffer
+ * and use that when scissor is not enabled.
+ */
+ struct pipe_scissor_state disabled_scissor dt;
+
+ /* Per vsc pipe bo's (a2xx-a5xx): */
+ struct fd_bo *vsc_pipe_bo[32] dt;
+
+ /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
+ * specific bitmask of state "groups".
+ */
+ uint32_t gen_dirty_map[NUM_DIRTY_BITS];
+ uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
+
+ /* Bitmask of all possible gen_dirty bits: */
+ uint32_t gen_all_dirty;
+
+ /* Generation specific bitmask of dirty state groups: */
+ uint32_t gen_dirty;
+
+ /* which state objects need to be re-emit'd: */
+ enum fd_dirty_3d_state dirty dt;
+
+ /* per shader-stage dirty status: */
+ enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
+
+ void *compute dt;
+ struct pipe_blend_state *blend dt;
+ struct pipe_rasterizer_state *rasterizer dt;
+ struct pipe_depth_stencil_alpha_state *zsa dt;
+
+ struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
+
+ struct fd_program_stateobj prog dt;
+ uint32_t bound_shader_stages dt;
+
+ struct fd_vertex_state vtx dt;
+
+ struct pipe_blend_color blend_color dt;
+ struct pipe_stencil_ref stencil_ref dt;
+ unsigned sample_mask dt;
+ unsigned min_samples dt;
+ /* local context fb state, for when ctx->batch is null: */
+ struct pipe_framebuffer_state framebuffer dt;
+ struct pipe_poly_stipple stipple dt;
+ struct pipe_viewport_state viewport dt;
+ struct pipe_scissor_state viewport_scissor dt;
+ struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
+ struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
+ struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
+ struct fd_streamout_stateobj streamout dt;
+ struct fd_global_bindings_stateobj global_bindings dt;
+ struct pipe_clip_state ucp dt;
+
+ struct pipe_query *cond_query dt;
+ bool cond_cond dt; /* inverted rendering condition */
+ uint cond_mode dt;
+
+ /* Private memory is a memory space where each fiber gets its own piece of
+ * memory, in addition to registers. It is backed by a buffer which needs
+ * to be large enough to hold the contents of every possible wavefront in
+ * every core of the GPU. Because it allocates space via the internal
+ * wavefront ID which is shared between all currently executing shaders,
+ * the same buffer can be reused by all shaders, as long as all shaders
+ * sharing the same buffer use the exact same configuration. There are two
+ * inputs to the configuration, the amount of per-fiber space and whether
+ * to use the newer per-wave or older per-fiber layout. We only ever
+ * increase the size, and shaders with a smaller size requirement simply
+ * use the larger existing buffer, so that we only need to keep track of
+ * one buffer and its size, but we still need to keep track of per-fiber
+ * and per-wave buffers separately so that we never use the same buffer
+ * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
+ * per-wave.
+ */
+ struct {
+ struct fd_bo *bo;
+ uint32_t per_fiber_size;
+ } pvtmem[2] dt;
+
+ /* maps per-shader-stage state plus variant key to hw
+ * program stateobj:
+ */
+ struct ir3_cache *shader_cache;
+
+ struct pipe_debug_callback debug;
+
+ struct u_trace_context trace_context dt;
+
+ /* Called on rebind_resource() for any per-gen cleanup required: */
+ void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
+
+ /* GMEM/tile handling fxns: */
+ void (*emit_tile_init)(struct fd_batch *batch) dt;
+ void (*emit_tile_prep)(struct fd_batch *batch,
+ const struct fd_tile *tile) dt;
+ void (*emit_tile_mem2gmem)(struct fd_batch *batch,
+ const struct fd_tile *tile) dt;
+ void (*emit_tile_renderprep)(struct fd_batch *batch,
+ const struct fd_tile *tile) dt;
+ void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
+ void (*emit_tile_gmem2mem)(struct fd_batch *batch,
+ const struct fd_tile *tile) dt;
+ void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
+
+ /* optional, for GMEM bypass: */
+ void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
+ void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
+
+ /* draw: */
+ bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *draw,
+ unsigned index_offset) dt;
+ bool (*clear)(struct fd_context *ctx, unsigned buffers,
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) dt;
+
+ /* compute: */
+ void (*launch_grid)(struct fd_context *ctx,
+ const struct pipe_grid_info *info) dt;
+
+ /* query: */
+ struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
+ unsigned index);
+ void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
+ void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
+ struct fd_ringbuffer *ring) dt;
+ void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
+
+ /* blitter: */
+ bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
+ void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
+
+ /* handling for barriers: */
+ void (*framebuffer_barrier)(struct fd_context *ctx) dt;
+
+ /* logger: */
+ void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ unsigned offset);
+ uint64_t (*ts_to_ns)(uint64_t ts);
+
+ /*
+ * Common pre-cooked VBO state (used for a3xx and later):
+ */
+
+ /* for clear/gmem->mem vertices, and mem->gmem */
+ struct pipe_resource *solid_vbuf;
+
+ /* for mem->gmem tex coords: */
+ struct pipe_resource *blit_texcoord_vbuf;
+
+ /* vertex state for solid_vbuf:
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state solid_vbuf_state;
+
+ /* vertex state for blit_prog:
+ * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state blit_vbuf_state;
+
+ /*
+ * Info about state of previous draw, for state that comes from
+ * pipe_draw_info (ie. not part of a CSO). This allows us to
+ * skip some register emit when the state doesn't change from
+ * draw-to-draw
+ */
+ struct {
+ bool dirty; /* last draw state unknown */
+ bool primitive_restart;
+ uint32_t index_start;
+ uint32_t instance_start;
+ uint32_t restart_index;
+ uint32_t streamout_mask;
+
+ /* some state changes require a different shader variant. Keep
+ * track of this so we know when we need to re-emit shader state
+ * due to variant change. See ir3_fixup_shader_state()
+ *
+ * (used for a3xx+, NULL otherwise)
+ */
+ struct ir3_shader_key *key;
+
+ } last dt;
};
static inline struct fd_context *
fd_context(struct pipe_context *pctx)
{
- return (struct fd_context *)pctx;
+ return (struct fd_context *)pctx;
}
static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target *target)
{
- return (struct fd_stream_output_target *)target;
+ return (struct fd_stream_output_target *)target;
}
/**
static inline bool
fd_context_dirty_resource(enum fd_dirty_3d_state dirty)
{
- return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA |
- FD_DIRTY_BLEND | FD_DIRTY_SSBO | FD_DIRTY_IMAGE |
- FD_DIRTY_VTXBUF | FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
+ return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND |
+ FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF |
+ FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
}
/* Mark specified non-shader-stage related state as dirty: */
static inline void
-fd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty)
- assert_dt
+fd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt
{
- assert(util_is_power_of_two_nonzero(dirty));
- STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
+ assert(util_is_power_of_two_nonzero(dirty));
+ STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
- ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
+ ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
- if (fd_context_dirty_resource(dirty))
- dirty |= FD_DIRTY_RESOURCE;
+ if (fd_context_dirty_resource(dirty))
+ dirty |= FD_DIRTY_RESOURCE;
- ctx->dirty |= dirty;
+ ctx->dirty |= dirty;
}
static inline void
fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
- enum fd_dirty_shader_state dirty)
- assert_dt
+ enum fd_dirty_shader_state dirty) assert_dt
{
- const enum fd_dirty_3d_state map[] = {
- FD_DIRTY_PROG,
- FD_DIRTY_CONST,
- FD_DIRTY_TEX,
- FD_DIRTY_SSBO,
- FD_DIRTY_IMAGE,
- };
-
- /* Need to update the table above if these shift: */
- STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
- STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
- STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
- STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
- STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
-
- assert(util_is_power_of_two_nonzero(dirty));
- assert(ffs(dirty) <= ARRAY_SIZE(map));
-
- ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
-
- ctx->dirty_shader[shader] |= dirty;
- fd_context_dirty(ctx, map[ffs(dirty) - 1]);
+ const enum fd_dirty_3d_state map[] = {
+ FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
+ FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
+ };
+
+ /* Need to update the table above if these shift: */
+ STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
+ STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
+ STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
+ STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
+ STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
+
+ assert(util_is_power_of_two_nonzero(dirty));
+ assert(ffs(dirty) <= ARRAY_SIZE(map));
+
+ ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
+
+ ctx->dirty_shader[shader] |= dirty;
+ fd_context_dirty(ctx, map[ffs(dirty) - 1]);
}
/* mark all state dirty: */
static inline void
-fd_context_all_dirty(struct fd_context *ctx)
- assert_dt
+fd_context_all_dirty(struct fd_context *ctx) assert_dt
{
- ctx->last.dirty = true;
- ctx->dirty = ~0;
+ ctx->last.dirty = true;
+ ctx->dirty = ~0;
- /* NOTE: don't use ~0 for gen_dirty, because the gen specific
- * emit code will loop over all the bits:
- */
- ctx->gen_dirty = ctx->gen_all_dirty;
+ /* NOTE: don't use ~0 for gen_dirty, because the gen specific
+ * emit code will loop over all the bits:
+ */
+ ctx->gen_dirty = ctx->gen_all_dirty;
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
- ctx->dirty_shader[i] = ~0;
+ for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
+ ctx->dirty_shader[i] = ~0;
}
static inline void
-fd_context_all_clean(struct fd_context *ctx)
- assert_dt
+fd_context_all_clean(struct fd_context *ctx) assert_dt
{
- ctx->last.dirty = false;
- ctx->dirty = 0;
- ctx->gen_dirty = 0;
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
- /* don't mark compute state as clean, since it is not emitted
- * during normal draw call. The places that call _all_dirty(),
- * it is safe to mark compute state dirty as well, but the
- * inverse is not true.
- */
- if (i == PIPE_SHADER_COMPUTE)
- continue;
- ctx->dirty_shader[i] = 0;
- }
+ ctx->last.dirty = false;
+ ctx->dirty = 0;
+ ctx->gen_dirty = 0;
+ for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
+ /* don't mark compute state as clean, since it is not emitted
+ * during normal draw call. The places that call _all_dirty(),
+ * it is safe to mark compute state dirty as well, but the
+ * inverse is not true.
+ */
+ if (i == PIPE_SHADER_COMPUTE)
+ continue;
+ ctx->dirty_shader[i] = 0;
+ }
}
/**
*/
static inline void
fd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty,
- uint32_t gen_dirty)
+ uint32_t gen_dirty)
{
- u_foreach_bit (b, dirty) {
- ctx->gen_dirty_map[b] |= gen_dirty;
- }
- ctx->gen_all_dirty |= gen_dirty;
+ u_foreach_bit (b, dirty) {
+ ctx->gen_dirty_map[b] |= gen_dirty;
+ }
+ ctx->gen_all_dirty |= gen_dirty;
}
/**
*/
static inline void
fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
- enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
+ enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
{
- u_foreach_bit (b, dirty) {
- ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
- }
- ctx->gen_all_dirty |= gen_dirty;
+ u_foreach_bit (b, dirty) {
+ ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
+ }
+ ctx->gen_all_dirty |= gen_dirty;
}
static inline struct pipe_scissor_state *
-fd_context_get_scissor(struct fd_context *ctx)
- assert_dt
+fd_context_get_scissor(struct fd_context *ctx) assert_dt
{
- return ctx->current_scissor;
+ return ctx->current_scissor;
}
static inline bool
fd_supported_prim(struct fd_context *ctx, unsigned prim)
{
- return (1 << prim) & ctx->primtype_mask;
+ return (1 << prim) & ctx->primtype_mask;
}
void fd_context_switch_from(struct fd_context *ctx) assert_dt;
-void fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch) assert_dt;
-struct fd_batch * fd_context_batch(struct fd_context *ctx) assert_dt;
-struct fd_batch * fd_context_batch_locked(struct fd_context *ctx) assert_dt;
+void fd_context_switch_to(struct fd_context *ctx,
+ struct fd_batch *batch) assert_dt;
+struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
+struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
void fd_context_setup_common_vbos(struct fd_context *ctx);
void fd_context_cleanup_common_vbos(struct fd_context *ctx);
void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
-struct pipe_context * fd_context_init(struct fd_context *ctx,
- struct pipe_screen *pscreen, const uint8_t *primtypes,
- void *priv, unsigned flags);
-struct pipe_context * fd_context_init_tc(struct pipe_context *pctx, unsigned flags);
+struct pipe_context *fd_context_init(struct fd_context *ctx,
+ struct pipe_screen *pscreen,
+ const uint8_t *primtypes, void *priv,
+ unsigned flags);
+struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
+ unsigned flags);
void fd_context_destroy(struct pipe_context *pctx) assert_dt;
*/
#include "pipe/p_state.h"
+#include "util/format/u_format.h"
#include "util/u_draw.h"
-#include "util/u_string.h"
+#include "util/u_helpers.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
-#include "util/format/u_format.h"
-#include "util/u_helpers.h"
+#include "util/u_string.h"
#include "freedreno_blitter.h"
-#include "freedreno_draw.h"
#include "freedreno_context.h"
+#include "freedreno_draw.h"
#include "freedreno_fence.h"
-#include "freedreno_state.h"
-#include "freedreno_resource.h"
#include "freedreno_query_acc.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
+#include "freedreno_state.h"
#include "freedreno_util.h"
static void
-resource_read(struct fd_batch *batch, struct pipe_resource *prsc)
- assert_dt
+resource_read(struct fd_batch *batch, struct pipe_resource *prsc) assert_dt
{
- if (!prsc)
- return;
- fd_batch_resource_read(batch, fd_resource(prsc));
+ if (!prsc)
+ return;
+ fd_batch_resource_read(batch, fd_resource(prsc));
}
static void
-resource_written(struct fd_batch *batch, struct pipe_resource *prsc)
- assert_dt
+resource_written(struct fd_batch *batch, struct pipe_resource *prsc) assert_dt
{
- if (!prsc)
- return;
- fd_batch_resource_write(batch, fd_resource(prsc));
+ if (!prsc)
+ return;
+ fd_batch_resource_write(batch, fd_resource(prsc));
}
static void
-batch_draw_tracking_for_dirty_bits(struct fd_batch *batch)
- assert_dt
+batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- unsigned buffers = 0, restore_buffers = 0;
-
- if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
- if (fd_depth_enabled(ctx)) {
- if (fd_resource(pfb->zsbuf->texture)->valid) {
- restore_buffers |= FD_BUFFER_DEPTH;
- } else {
- batch->invalidated |= FD_BUFFER_DEPTH;
- }
- batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
- if (fd_depth_write_enabled(ctx)) {
- buffers |= FD_BUFFER_DEPTH;
- resource_written(batch, pfb->zsbuf->texture);
- } else {
- resource_read(batch, pfb->zsbuf->texture);
- }
- }
-
- if (fd_stencil_enabled(ctx)) {
- if (fd_resource(pfb->zsbuf->texture)->valid) {
- restore_buffers |= FD_BUFFER_STENCIL;
- } else {
- batch->invalidated |= FD_BUFFER_STENCIL;
- }
- batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
- buffers |= FD_BUFFER_STENCIL;
- resource_written(batch, pfb->zsbuf->texture);
- }
- }
-
- if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) {
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- struct pipe_resource *surf;
-
- if (!pfb->cbufs[i])
- continue;
-
- surf = pfb->cbufs[i]->texture;
-
- if (fd_resource(surf)->valid) {
- restore_buffers |= PIPE_CLEAR_COLOR0 << i;
- } else {
- batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
- }
-
- buffers |= PIPE_CLEAR_COLOR0 << i;
-
- if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
- resource_written(batch, pfb->cbufs[i]->texture);
- }
- }
-
- if (ctx->dirty & FD_DIRTY_BLEND) {
- if (ctx->blend->logicop_enable)
- batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- if (ctx->blend->rt[i].blend_enable)
- batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
- }
- }
-
- /* Mark SSBOs */
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
- const struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[PIPE_SHADER_FRAGMENT];
-
- u_foreach_bit (i, so->enabled_mask & so->writable_mask)
- resource_written(batch, so->sb[i].buffer);
-
- u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
- resource_read(batch, so->sb[i].buffer);
- }
-
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
- u_foreach_bit (i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
- struct pipe_image_view *img =
- &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
- if (img->access & PIPE_IMAGE_ACCESS_WRITE)
- resource_written(batch, img->resource);
- else
- resource_read(batch, img->resource);
- }
- }
-
- u_foreach_bit (s, ctx->bound_shader_stages) {
- /* Mark constbuf as being read: */
- if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_CONST) {
- u_foreach_bit (i, ctx->constbuf[s].enabled_mask)
- resource_read(batch, ctx->constbuf[s].cb[i].buffer);
- }
-
- /* Mark textures as being read */
- if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_TEX) {
- u_foreach_bit (i, ctx->tex[s].valid_textures)
- resource_read(batch, ctx->tex[s].textures[i]->texture);
- }
- }
-
- /* Mark VBOs as being read */
- if (ctx->dirty & FD_DIRTY_VTXBUF) {
- u_foreach_bit (i, ctx->vtx.vertexbuf.enabled_mask) {
- assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
- resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
- }
- }
-
- /* Mark streamout buffers as being written.. */
- if (ctx->dirty & FD_DIRTY_STREAMOUT) {
- for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
- if (ctx->streamout.targets[i])
- resource_written(batch, ctx->streamout.targets[i]->buffer);
- }
-
- /* any buffers that haven't been cleared yet, we need to restore: */
- batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
- /* and any buffers used, need to be resolved: */
- batch->resolve |= buffers;
+ struct fd_context *ctx = batch->ctx;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ unsigned buffers = 0, restore_buffers = 0;
+
+ if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
+ if (fd_depth_enabled(ctx)) {
+ if (fd_resource(pfb->zsbuf->texture)->valid) {
+ restore_buffers |= FD_BUFFER_DEPTH;
+ } else {
+ batch->invalidated |= FD_BUFFER_DEPTH;
+ }
+ batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
+ if (fd_depth_write_enabled(ctx)) {
+ buffers |= FD_BUFFER_DEPTH;
+ resource_written(batch, pfb->zsbuf->texture);
+ } else {
+ resource_read(batch, pfb->zsbuf->texture);
+ }
+ }
+
+ if (fd_stencil_enabled(ctx)) {
+ if (fd_resource(pfb->zsbuf->texture)->valid) {
+ restore_buffers |= FD_BUFFER_STENCIL;
+ } else {
+ batch->invalidated |= FD_BUFFER_STENCIL;
+ }
+ batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
+ buffers |= FD_BUFFER_STENCIL;
+ resource_written(batch, pfb->zsbuf->texture);
+ }
+ }
+
+ if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) {
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ struct pipe_resource *surf;
+
+ if (!pfb->cbufs[i])
+ continue;
+
+ surf = pfb->cbufs[i]->texture;
+
+ if (fd_resource(surf)->valid) {
+ restore_buffers |= PIPE_CLEAR_COLOR0 << i;
+ } else {
+ batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
+ }
+
+ buffers |= PIPE_CLEAR_COLOR0 << i;
+
+ if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
+ resource_written(batch, pfb->cbufs[i]->texture);
+ }
+ }
+
+ if (ctx->dirty & FD_DIRTY_BLEND) {
+ if (ctx->blend->logicop_enable)
+ batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ if (ctx->blend->rt[i].blend_enable)
+ batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
+ }
+ }
+
+ /* Mark SSBOs */
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
+ const struct fd_shaderbuf_stateobj *so =
+ &ctx->shaderbuf[PIPE_SHADER_FRAGMENT];
+
+ u_foreach_bit (i, so->enabled_mask & so->writable_mask)
+ resource_written(batch, so->sb[i].buffer);
+
+ u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
+ resource_read(batch, so->sb[i].buffer);
+ }
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
+ u_foreach_bit (i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
+ struct pipe_image_view *img =
+ &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
+ if (img->access & PIPE_IMAGE_ACCESS_WRITE)
+ resource_written(batch, img->resource);
+ else
+ resource_read(batch, img->resource);
+ }
+ }
+
+ u_foreach_bit (s, ctx->bound_shader_stages) {
+ /* Mark constbuf as being read: */
+ if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_CONST) {
+ u_foreach_bit (i, ctx->constbuf[s].enabled_mask)
+ resource_read(batch, ctx->constbuf[s].cb[i].buffer);
+ }
+
+ /* Mark textures as being read */
+ if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_TEX) {
+ u_foreach_bit (i, ctx->tex[s].valid_textures)
+ resource_read(batch, ctx->tex[s].textures[i]->texture);
+ }
+ }
+
+ /* Mark VBOs as being read */
+ if (ctx->dirty & FD_DIRTY_VTXBUF) {
+ u_foreach_bit (i, ctx->vtx.vertexbuf.enabled_mask) {
+ assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
+ resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
+ }
+ }
+
+ /* Mark streamout buffers as being written.. */
+ if (ctx->dirty & FD_DIRTY_STREAMOUT) {
+ for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
+ if (ctx->streamout.targets[i])
+ resource_written(batch, ctx->streamout.targets[i]->buffer);
+ }
+
+ /* any buffers that haven't been cleared yet, we need to restore: */
+ batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
+ /* and any buffers used, need to be resolved: */
+ batch->resolve |= buffers;
}
static void
batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
- const struct pipe_draw_indirect_info *indirect)
- assert_dt
+ const struct pipe_draw_indirect_info *indirect) assert_dt
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
- * query_buf may not be created yet.
- */
- fd_batch_update_queries(batch);
+ /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
+ * query_buf may not be created yet.
+ */
+ fd_batch_update_queries(batch);
- /*
- * Figure out the buffers/features we need:
- */
+ /*
+ * Figure out the buffers/features we need:
+ */
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- if (ctx->dirty & FD_DIRTY_RESOURCE)
- batch_draw_tracking_for_dirty_bits(batch);
+ if (ctx->dirty & FD_DIRTY_RESOURCE)
+ batch_draw_tracking_for_dirty_bits(batch);
- /* Mark index buffer as being read */
- if (info->index_size)
- resource_read(batch, info->index.resource);
+ /* Mark index buffer as being read */
+ if (info->index_size)
+ resource_read(batch, info->index.resource);
- /* Mark indirect draw buffer as being read */
- if (indirect) {
- if (indirect->buffer)
- resource_read(batch, indirect->buffer);
- if (indirect->count_from_stream_output)
- resource_read(batch, fd_stream_output_target(indirect->count_from_stream_output)->offset_buf);
- }
+ /* Mark indirect draw buffer as being read */
+ if (indirect) {
+ if (indirect->buffer)
+ resource_read(batch, indirect->buffer);
+ if (indirect->count_from_stream_output)
+ resource_read(
+ batch, fd_stream_output_target(indirect->count_from_stream_output)
+ ->offset_buf);
+ }
- resource_written(batch, batch->query_buf);
+ resource_written(batch, batch->query_buf);
- list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
- resource_written(batch, aq->prsc);
+ list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
+ resource_written(batch, aq->prsc);
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
}
static void
update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info,
- const struct pipe_draw_start_count *draws, unsigned num_draws)
- assert_dt
+ const struct pipe_draw_start_count *draws,
+ unsigned num_draws) assert_dt
{
- ctx->stats.draw_calls++;
-
- if (ctx->screen->gpu_id < 600) {
- /* Counting prims in sw doesn't work for GS and tesselation. For older
- * gens we don't have those stages and don't have the hw counters enabled,
- * so keep the count accurate for non-patch geometry.
- */
- unsigned prims = 0;
- if ((info->mode != PIPE_PRIM_PATCHES) &&
- (info->mode != PIPE_PRIM_MAX)) {
- for (unsigned i = 0; i < num_draws; i++) {
- prims += u_reduced_prims_for_vertices(info->mode, draws[i].count);
- }
- }
-
- ctx->stats.prims_generated += prims;
-
- if (ctx->streamout.num_targets > 0) {
- /* Clip the prims we're writing to the size of the SO buffers. */
- enum pipe_prim_type tf_prim = u_decomposed_prim(info->mode);
- unsigned verts_written = u_vertices_for_prims(tf_prim, prims);
- unsigned remaining_vert_space = ctx->streamout.max_tf_vtx - ctx->streamout.verts_written;
- if (verts_written > remaining_vert_space) {
- verts_written = remaining_vert_space;
- u_trim_pipe_prim(tf_prim, &remaining_vert_space);
- }
- ctx->streamout.verts_written += verts_written;
-
- ctx->stats.prims_emitted += u_reduced_prims_for_vertices(tf_prim, verts_written);
- }
- }
+ ctx->stats.draw_calls++;
+
+ if (ctx->screen->gpu_id < 600) {
+ /* Counting prims in sw doesn't work for GS and tesselation. For older
+ * gens we don't have those stages and don't have the hw counters enabled,
+ * so keep the count accurate for non-patch geometry.
+ */
+ unsigned prims = 0;
+ if ((info->mode != PIPE_PRIM_PATCHES) && (info->mode != PIPE_PRIM_MAX)) {
+ for (unsigned i = 0; i < num_draws; i++) {
+ prims += u_reduced_prims_for_vertices(info->mode, draws[i].count);
+ }
+ }
+
+ ctx->stats.prims_generated += prims;
+
+ if (ctx->streamout.num_targets > 0) {
+ /* Clip the prims we're writing to the size of the SO buffers. */
+ enum pipe_prim_type tf_prim = u_decomposed_prim(info->mode);
+ unsigned verts_written = u_vertices_for_prims(tf_prim, prims);
+ unsigned remaining_vert_space =
+ ctx->streamout.max_tf_vtx - ctx->streamout.verts_written;
+ if (verts_written > remaining_vert_space) {
+ verts_written = remaining_vert_space;
+ u_trim_pipe_prim(tf_prim, &remaining_vert_space);
+ }
+ ctx->streamout.verts_written += verts_written;
+
+ ctx->stats.prims_emitted +=
+ u_reduced_prims_for_vertices(tf_prim, verts_written);
+ }
+ }
}
static void
fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
- const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draws,
- unsigned num_draws)
- in_dt
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *draws, unsigned num_draws) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
-
- /* for debugging problems with indirect draw, it is convenient
- * to be able to emulate it, to determine if game is feeding us
- * bogus data:
- */
- if (indirect && indirect->buffer && FD_DBG(NOINDR)) {
- /* num_draws is only applicable for direct draws: */
- assert(num_draws == 1);
- util_draw_indirect(pctx, info, indirect);
- return;
- }
-
- /* TODO: push down the region versions into the tiles */
- if (!fd_render_condition_check(pctx))
- return;
-
- /* emulate unsupported primitives: */
- if (!fd_supported_prim(ctx, info->mode)) {
- if (ctx->streamout.num_targets > 0)
- mesa_loge("stream-out with emulated prims");
- util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
- util_primconvert_draw_vbo(ctx->primconvert, info, indirect, draws, num_draws);
- return;
- }
-
- /* Upload a user index buffer. */
- struct pipe_resource *indexbuf = NULL;
- unsigned index_offset = 0;
- struct pipe_draw_info new_info;
- if (info->index_size) {
- if (info->has_user_indices) {
- if (num_draws > 1) {
- util_draw_multi(pctx, info, indirect, draws, num_draws);
- return;
- }
- if (!util_upload_index_buffer(pctx, info, &draws[0],
- &indexbuf, &index_offset, 4))
- return;
- new_info = *info;
- new_info.index.resource = indexbuf;
- new_info.has_user_indices = false;
- info = &new_info;
- } else {
- indexbuf = info->index.resource;
- }
- }
-
- if ((ctx->streamout.num_targets > 0) && (num_draws > 1)) {
- util_draw_multi(pctx, info, indirect, draws, num_draws);
- return;
- }
-
- struct fd_batch *batch = fd_context_batch(ctx);
-
- if (ctx->in_discard_blit) {
- fd_batch_reset(batch);
- fd_context_all_dirty(ctx);
- }
-
- batch_draw_tracking(batch, info, indirect);
-
- while (unlikely(!fd_batch_lock_submit(batch))) {
- /* The current batch was flushed in batch_draw_tracking()
- * so start anew. We know this won't happen a second time
- * since we are dealing with a fresh batch:
- */
- fd_batch_reference(&batch, NULL);
- batch = fd_context_batch(ctx);
- batch_draw_tracking(batch, info, indirect);
- assert(ctx->batch == batch);
- }
-
- batch->blit = ctx->in_discard_blit;
- batch->back_blit = ctx->in_shadow;
- batch->num_draws++;
-
- /* Clearing last_fence must come after the batch dependency tracking
- * (resource_read()/resource_written()), as that can trigger a flush,
- * re-populating last_fence
- */
- fd_fence_ref(&ctx->last_fence, NULL);
-
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- DBG("%p: %ux%u num_draws=%u (%s/%s)", batch,
- pfb->width, pfb->height, batch->num_draws,
- util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
- util_format_short_name(pipe_surface_format(pfb->zsbuf)));
-
- batch->cost += ctx->draw_cost;
-
- for (unsigned i = 0; i < num_draws; i++) {
- if (ctx->draw_vbo(ctx, info, indirect, &draws[i], index_offset))
- batch->needs_flush = true;
-
- batch->num_vertices += draws[i].count * info->instance_count;
- }
-
- if (unlikely(ctx->stats_users > 0))
- update_draw_stats(ctx, info, draws, num_draws);
-
- for (unsigned i = 0; i < ctx->streamout.num_targets; i++) {
- assert(num_draws == 1);
- ctx->streamout.offsets[i] += draws[0].count;
- }
-
- if (FD_DBG(DDRAW))
- fd_context_all_dirty(ctx);
-
- fd_batch_unlock_submit(batch);
- fd_batch_check_size(batch);
- fd_batch_reference(&batch, NULL);
-
- if (info == &new_info)
- pipe_resource_reference(&indexbuf, NULL);
+ struct fd_context *ctx = fd_context(pctx);
+
+ /* for debugging problems with indirect draw, it is convenient
+ * to be able to emulate it, to determine if game is feeding us
+ * bogus data:
+ */
+ if (indirect && indirect->buffer && FD_DBG(NOINDR)) {
+ /* num_draws is only applicable for direct draws: */
+ assert(num_draws == 1);
+ util_draw_indirect(pctx, info, indirect);
+ return;
+ }
+
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
+ /* emulate unsupported primitives: */
+ if (!fd_supported_prim(ctx, info->mode)) {
+ if (ctx->streamout.num_targets > 0)
+ mesa_loge("stream-out with emulated prims");
+ util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
+ util_primconvert_draw_vbo(ctx->primconvert, info, indirect, draws,
+ num_draws);
+ return;
+ }
+
+ /* Upload a user index buffer. */
+ struct pipe_resource *indexbuf = NULL;
+ unsigned index_offset = 0;
+ struct pipe_draw_info new_info;
+ if (info->index_size) {
+ if (info->has_user_indices) {
+ if (num_draws > 1) {
+ util_draw_multi(pctx, info, indirect, draws, num_draws);
+ return;
+ }
+ if (!util_upload_index_buffer(pctx, info, &draws[0], &indexbuf,
+ &index_offset, 4))
+ return;
+ new_info = *info;
+ new_info.index.resource = indexbuf;
+ new_info.has_user_indices = false;
+ info = &new_info;
+ } else {
+ indexbuf = info->index.resource;
+ }
+ }
+
+ if ((ctx->streamout.num_targets > 0) && (num_draws > 1)) {
+ util_draw_multi(pctx, info, indirect, draws, num_draws);
+ return;
+ }
+
+ struct fd_batch *batch = fd_context_batch(ctx);
+
+ if (ctx->in_discard_blit) {
+ fd_batch_reset(batch);
+ fd_context_all_dirty(ctx);
+ }
+
+ batch_draw_tracking(batch, info, indirect);
+
+ while (unlikely(!fd_batch_lock_submit(batch))) {
+ /* The current batch was flushed in batch_draw_tracking()
+ * so start anew. We know this won't happen a second time
+ * since we are dealing with a fresh batch:
+ */
+ fd_batch_reference(&batch, NULL);
+ batch = fd_context_batch(ctx);
+ batch_draw_tracking(batch, info, indirect);
+ assert(ctx->batch == batch);
+ }
+
+ batch->blit = ctx->in_discard_blit;
+ batch->back_blit = ctx->in_shadow;
+ batch->num_draws++;
+
+ /* Clearing last_fence must come after the batch dependency tracking
+ * (resource_read()/resource_written()), as that can trigger a flush,
+ * re-populating last_fence
+ */
+ fd_fence_ref(&ctx->last_fence, NULL);
+
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ DBG("%p: %ux%u num_draws=%u (%s/%s)", batch, pfb->width, pfb->height,
+ batch->num_draws,
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+
+ batch->cost += ctx->draw_cost;
+
+ for (unsigned i = 0; i < num_draws; i++) {
+ if (ctx->draw_vbo(ctx, info, indirect, &draws[i], index_offset))
+ batch->needs_flush = true;
+
+ batch->num_vertices += draws[i].count * info->instance_count;
+ }
+
+ if (unlikely(ctx->stats_users > 0))
+ update_draw_stats(ctx, info, draws, num_draws);
+
+ for (unsigned i = 0; i < ctx->streamout.num_targets; i++) {
+ assert(num_draws == 1);
+ ctx->streamout.offsets[i] += draws[0].count;
+ }
+
+ if (FD_DBG(DDRAW))
+ fd_context_all_dirty(ctx);
+
+ fd_batch_unlock_submit(batch);
+ fd_batch_check_size(batch);
+ fd_batch_reference(&batch, NULL);
+
+ if (info == &new_info)
+ pipe_resource_reference(&indexbuf, NULL);
}
static void
-batch_clear_tracking(struct fd_batch *batch, unsigned buffers)
- assert_dt
+batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- unsigned cleared_buffers;
-
- /* pctx->clear() is only for full-surface clears, so scissor is
- * equivalent to having GL_SCISSOR_TEST disabled:
- */
- batch->max_scissor.minx = 0;
- batch->max_scissor.miny = 0;
- batch->max_scissor.maxx = pfb->width;
- batch->max_scissor.maxy = pfb->height;
-
- /* for bookkeeping about which buffers have been cleared (and thus
- * can fully or partially skip mem2gmem) we need to ignore buffers
- * that have already had a draw, in case apps do silly things like
- * clear after draw (ie. if you only clear the color buffer, but
- * something like alpha-test causes side effects from the draw in
- * the depth buffer, etc)
- */
- cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
- batch->cleared |= buffers;
- batch->invalidated |= cleared_buffers;
-
- batch->resolve |= buffers;
- batch->needs_flush = true;
-
- fd_screen_lock(ctx->screen);
-
- if (buffers & PIPE_CLEAR_COLOR)
- for (unsigned i = 0; i < pfb->nr_cbufs; i++)
- if (buffers & (PIPE_CLEAR_COLOR0 << i))
- resource_written(batch, pfb->cbufs[i]->texture);
-
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- resource_written(batch, pfb->zsbuf->texture);
- batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
- }
-
- resource_written(batch, batch->query_buf);
-
- list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
- resource_written(batch, aq->prsc);
-
- fd_screen_unlock(ctx->screen);
+ struct fd_context *ctx = batch->ctx;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ unsigned cleared_buffers;
+
+ /* pctx->clear() is only for full-surface clears, so scissor is
+ * equivalent to having GL_SCISSOR_TEST disabled:
+ */
+ batch->max_scissor.minx = 0;
+ batch->max_scissor.miny = 0;
+ batch->max_scissor.maxx = pfb->width;
+ batch->max_scissor.maxy = pfb->height;
+
+ /* for bookkeeping about which buffers have been cleared (and thus
+ * can fully or partially skip mem2gmem) we need to ignore buffers
+ * that have already had a draw, in case apps do silly things like
+ * clear after draw (ie. if you only clear the color buffer, but
+ * something like alpha-test causes side effects from the draw in
+ * the depth buffer, etc)
+ */
+ cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
+ batch->cleared |= buffers;
+ batch->invalidated |= cleared_buffers;
+
+ batch->resolve |= buffers;
+ batch->needs_flush = true;
+
+ fd_screen_lock(ctx->screen);
+
+ if (buffers & PIPE_CLEAR_COLOR)
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++)
+ if (buffers & (PIPE_CLEAR_COLOR0 << i))
+ resource_written(batch, pfb->cbufs[i]->texture);
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ resource_written(batch, pfb->zsbuf->texture);
+ batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
+ }
+
+ resource_written(batch, batch->query_buf);
+
+ list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
+ resource_written(batch, aq->prsc);
+
+ fd_screen_unlock(ctx->screen);
}
static void
fd_clear(struct pipe_context *pctx, unsigned buffers,
- const struct pipe_scissor_state *scissor_state,
- const union pipe_color_union *color, double depth,
- unsigned stencil)
- in_dt
+ const struct pipe_scissor_state *scissor_state,
+ const union pipe_color_union *color, double depth,
+ unsigned stencil) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
-
- /* TODO: push down the region versions into the tiles */
- if (!fd_render_condition_check(pctx))
- return;
-
- struct fd_batch *batch = fd_context_batch(ctx);
-
- if (ctx->in_discard_blit) {
- fd_batch_reset(batch);
- fd_context_all_dirty(ctx);
- }
-
- batch_clear_tracking(batch, buffers);
-
- while (unlikely(!fd_batch_lock_submit(batch))) {
- /* The current batch was flushed in batch_clear_tracking()
- * so start anew. We know this won't happen a second time
- * since we are dealing with a fresh batch:
- */
- fd_batch_reference(&batch, NULL);
- batch = fd_context_batch(ctx);
- batch_clear_tracking(batch, buffers);
- assert(ctx->batch == batch);
- }
-
- /* Clearing last_fence must come after the batch dependency tracking
- * (resource_read()/resource_written()), as that can trigger a flush,
- * re-populating last_fence
- */
- fd_fence_ref(&ctx->last_fence, NULL);
-
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
- pfb->width, pfb->height, depth, stencil,
- util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
- util_format_short_name(pipe_surface_format(pfb->zsbuf)));
-
- /* if per-gen backend doesn't implement ctx->clear() generic
- * blitter clear:
- */
- bool fallback = true;
-
- if (ctx->clear) {
- fd_batch_update_queries(batch);
-
- if (ctx->clear(ctx, buffers, color, depth, stencil)) {
- if (FD_DBG(DCLEAR))
- fd_context_all_dirty(ctx);
-
- fallback = false;
- }
- }
-
- fd_batch_unlock_submit(batch);
- fd_batch_check_size(batch);
-
- if (fallback) {
- fd_blitter_clear(pctx, buffers, color, depth, stencil);
- }
-
- fd_batch_reference(&batch, NULL);
+ struct fd_context *ctx = fd_context(pctx);
+
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
+ struct fd_batch *batch = fd_context_batch(ctx);
+
+ if (ctx->in_discard_blit) {
+ fd_batch_reset(batch);
+ fd_context_all_dirty(ctx);
+ }
+
+ batch_clear_tracking(batch, buffers);
+
+ while (unlikely(!fd_batch_lock_submit(batch))) {
+ /* The current batch was flushed in batch_clear_tracking()
+ * so start anew. We know this won't happen a second time
+ * since we are dealing with a fresh batch:
+ */
+ fd_batch_reference(&batch, NULL);
+ batch = fd_context_batch(ctx);
+ batch_clear_tracking(batch, buffers);
+ assert(ctx->batch == batch);
+ }
+
+ /* Clearing last_fence must come after the batch dependency tracking
+ * (resource_read()/resource_written()), as that can trigger a flush,
+ * re-populating last_fence
+ */
+ fd_fence_ref(&ctx->last_fence, NULL);
+
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers, pfb->width,
+ pfb->height, depth, stencil,
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+
+ /* if per-gen backend doesn't implement ctx->clear() generic
+ * blitter clear:
+ */
+ bool fallback = true;
+
+ if (ctx->clear) {
+ fd_batch_update_queries(batch);
+
+ if (ctx->clear(ctx, buffers, color, depth, stencil)) {
+ if (FD_DBG(DCLEAR))
+ fd_context_all_dirty(ctx);
+
+ fallback = false;
+ }
+ }
+
+ fd_batch_unlock_submit(batch);
+ fd_batch_check_size(batch);
+
+ if (fallback) {
+ fd_blitter_clear(pctx, buffers, color, depth, stencil);
+ }
+
+ fd_batch_reference(&batch, NULL);
}
static void
fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
- const union pipe_color_union *color,
- unsigned x, unsigned y, unsigned w, unsigned h,
- bool render_condition_enabled)
+ const union pipe_color_union *color, unsigned x,
+ unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
{
- DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
+ DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
}
static void
fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
- unsigned buffers, double depth, unsigned stencil,
- unsigned x, unsigned y, unsigned w, unsigned h,
- bool render_condition_enabled)
+ unsigned buffers, double depth, unsigned stencil,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
{
- DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
- buffers, depth, stencil, x, y, w, h);
+ DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
+ buffers, depth, stencil, x, y, w, h);
}
static void
-fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
- in_dt
+fd_launch_grid(struct pipe_context *pctx,
+ const struct pipe_grid_info *info) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- const struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[PIPE_SHADER_COMPUTE];
- struct fd_batch *batch, *save_batch = NULL;
+ struct fd_context *ctx = fd_context(pctx);
+ const struct fd_shaderbuf_stateobj *so =
+ &ctx->shaderbuf[PIPE_SHADER_COMPUTE];
+ struct fd_batch *batch, *save_batch = NULL;
- batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
- fd_batch_reference(&save_batch, ctx->batch);
- fd_batch_reference(&ctx->batch, batch);
- fd_context_all_dirty(ctx);
+ batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
+ fd_batch_reference(&save_batch, ctx->batch);
+ fd_batch_reference(&ctx->batch, batch);
+ fd_context_all_dirty(ctx);
- fd_screen_lock(ctx->screen);
+ fd_screen_lock(ctx->screen);
- /* Mark SSBOs */
- u_foreach_bit (i, so->enabled_mask & so->writable_mask)
- resource_written(batch, so->sb[i].buffer);
+ /* Mark SSBOs */
+ u_foreach_bit (i, so->enabled_mask & so->writable_mask)
+ resource_written(batch, so->sb[i].buffer);
- u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
- resource_read(batch, so->sb[i].buffer);
+ u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
+ resource_read(batch, so->sb[i].buffer);
- u_foreach_bit(i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
- struct pipe_image_view *img =
- &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
- if (img->access & PIPE_IMAGE_ACCESS_WRITE)
- resource_written(batch, img->resource);
- else
- resource_read(batch, img->resource);
- }
+ u_foreach_bit (i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
+ struct pipe_image_view *img = &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
+ if (img->access & PIPE_IMAGE_ACCESS_WRITE)
+ resource_written(batch, img->resource);
+ else
+ resource_read(batch, img->resource);
+ }
- /* UBO's are read */
- u_foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
- resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
+ /* UBO's are read */
+ u_foreach_bit (i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
+ resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
- /* Mark textures as being read */
- u_foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
- resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
+ /* Mark textures as being read */
+ u_foreach_bit (i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
+ resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
- /* For global buffers, we don't really know if read or written, so assume
- * the worst:
- */
- u_foreach_bit(i, ctx->global_bindings.enabled_mask)
- resource_written(batch, ctx->global_bindings.buf[i]);
+ /* For global buffers, we don't really know if read or written, so assume
+ * the worst:
+ */
+ u_foreach_bit (i, ctx->global_bindings.enabled_mask)
+ resource_written(batch, ctx->global_bindings.buf[i]);
- if (info->indirect)
- resource_read(batch, info->indirect);
+ if (info->indirect)
+ resource_read(batch, info->indirect);
- fd_screen_unlock(ctx->screen);
+ fd_screen_unlock(ctx->screen);
- batch->needs_flush = true;
- ctx->launch_grid(ctx, info);
+ batch->needs_flush = true;
+ ctx->launch_grid(ctx, info);
- fd_batch_flush(batch);
+ fd_batch_flush(batch);
- fd_batch_reference(&ctx->batch, save_batch);
- fd_context_all_dirty(ctx);
- fd_batch_reference(&save_batch, NULL);
- fd_batch_reference(&batch, NULL);
+ fd_batch_reference(&ctx->batch, save_batch);
+ fd_context_all_dirty(ctx);
+ fd_batch_reference(&save_batch, NULL);
+ fd_batch_reference(&batch, NULL);
}
void
fd_draw_init(struct pipe_context *pctx)
{
- pctx->draw_vbo = fd_draw_vbo;
- pctx->clear = fd_clear;
- pctx->clear_render_target = fd_clear_render_target;
- pctx->clear_depth_stencil = fd_clear_depth_stencil;
-
- if (has_compute(fd_screen(pctx->screen))) {
- pctx->launch_grid = fd_launch_grid;
- }
+ pctx->draw_vbo = fd_draw_vbo;
+ pctx->clear = fd_clear;
+ pctx->clear_render_target = fd_clear_render_target;
+ pctx->clear_depth_stencil = fd_clear_depth_stencil;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->launch_grid = fd_launch_grid;
+ }
}
#ifndef FREEDRENO_DRAW_H_
#define FREEDRENO_DRAW_H_
-#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include "freedreno_context.h"
#include "freedreno_resource.h"
static inline void
fd_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- enum pc_di_src_sel src_sel, uint32_t count,
- uint8_t instances,
- enum pc_di_index_size idx_type,
- uint32_t idx_size, uint32_t idx_offset,
- struct pipe_resource *idx_buffer)
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ enum pc_di_src_sel src_sel, uint32_t count, uint8_t instances,
+ enum pc_di_index_size idx_type, uint32_t idx_size, uint32_t idx_offset,
+ struct pipe_resource *idx_buffer)
{
- /* for debug after a lock up, write a unique counter value
- * to scratch7 for each draw, to make it easier to match up
- * register dumps to cmdstream. The combination of IB
- * (scratch6) and DRAW is enough to "triangulate" the
- * particular draw that caused lockup.
- */
- emit_marker(ring, 7);
-
- if (is_a3xx_p0(batch->ctx->screen)) {
- /* dummy-draw workaround: */
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, USE_VISIBILITY, 0));
- OUT_RING(ring, 0); /* NumIndices */
-
- /* ugg, hard-code register offset to avoid pulling in the
- * a3xx register headers into something #included from a2xx
- */
- OUT_PKT0(ring, 0x2206, 1); /* A3XX_HLSQ_CONST_VSPRESV_RANGE_REG */
- OUT_RING(ring, 0);
- }
-
- if (is_a20x(batch->ctx->screen)) {
- /* a20x has a different draw command for drawing with binning data
- * note: if we do patching we will have to insert a NOP
- *
- * binning data is is 1 byte/vertex (8x8x4 bin position of vertex)
- * base ptr set by the CP_SET_DRAW_INIT_FLAGS command
- *
- * TODO: investigate the faceness_cull_select parameter to see how
- * it is used with hw binning to use "faceness" bits
- */
- uint32_t size = 2;
- if (vismode)
- size += 2;
- if (idx_buffer)
- size += 2;
-
- BEGIN_RING(ring, size+1);
- if (vismode)
- util_dynarray_append(&batch->draw_patches, uint32_t*, ring->cur);
-
- OUT_PKT3(ring, vismode ? CP_DRAW_INDX_BIN : CP_DRAW_INDX, size);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW_A20X(primtype, DI_FACE_CULL_NONE, src_sel,
- idx_type, vismode, vismode, count));
- if (vismode == USE_VISIBILITY) {
- OUT_RING(ring, batch->num_vertices);
- OUT_RING(ring, count);
- }
- } else {
- OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 5 : 3);
- OUT_RING(ring, 0x00000000); /* viz query info. */
- if (vismode == USE_VISIBILITY) {
- /* leave vis mode blank for now, it will be patched up when
- * we know if we are binning or not
- */
- OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0, instances),
- &batch->draw_patches);
- } else {
- OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode, instances));
- }
- OUT_RING(ring, count); /* NumIndices */
- }
-
- if (idx_buffer) {
- OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
- OUT_RING (ring, idx_size);
- }
-
- emit_marker(ring, 7);
-
- fd_reset_wfi(batch);
+ /* for debug after a lock up, write a unique counter value
+ * to scratch7 for each draw, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB
+ * (scratch6) and DRAW is enough to "triangulate" the
+ * particular draw that caused lockup.
+ */
+ emit_marker(ring, 7);
+
+ if (is_a3xx_p0(batch->ctx->screen)) {
+ /* dummy-draw workaround: */
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN,
+ USE_VISIBILITY, 0));
+ OUT_RING(ring, 0); /* NumIndices */
+
+ /* ugg, hard-code register offset to avoid pulling in the
+ * a3xx register headers into something #included from a2xx
+ */
+ OUT_PKT0(ring, 0x2206, 1); /* A3XX_HLSQ_CONST_VSPRESV_RANGE_REG */
+ OUT_RING(ring, 0);
+ }
+
+ if (is_a20x(batch->ctx->screen)) {
+ /* a20x has a different draw command for drawing with binning data
+ * note: if we do patching we will have to insert a NOP
+ *
+ * binning data is is 1 byte/vertex (8x8x4 bin position of vertex)
+ * base ptr set by the CP_SET_DRAW_INIT_FLAGS command
+ *
+ * TODO: investigate the faceness_cull_select parameter to see how
+ * it is used with hw binning to use "faceness" bits
+ */
+ uint32_t size = 2;
+ if (vismode)
+ size += 2;
+ if (idx_buffer)
+ size += 2;
+
+ BEGIN_RING(ring, size + 1);
+ if (vismode)
+ util_dynarray_append(&batch->draw_patches, uint32_t *, ring->cur);
+
+ OUT_PKT3(ring, vismode ? CP_DRAW_INDX_BIN : CP_DRAW_INDX, size);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW_A20X(primtype, DI_FACE_CULL_NONE, src_sel, idx_type,
+ vismode, vismode, count));
+ if (vismode == USE_VISIBILITY) {
+ OUT_RING(ring, batch->num_vertices);
+ OUT_RING(ring, count);
+ }
+ } else {
+ OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 5 : 3);
+ OUT_RING(ring, 0x00000000); /* viz query info. */
+ if (vismode == USE_VISIBILITY) {
+ /* leave vis mode blank for now, it will be patched up when
+ * we know if we are binning or not
+ */
+ OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0, instances),
+ &batch->draw_patches);
+ } else {
+ OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode, instances));
+ }
+ OUT_RING(ring, count); /* NumIndices */
+ }
+
+ if (idx_buffer) {
+ OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
+ OUT_RING(ring, idx_size);
+ }
+
+ emit_marker(ring, 7);
+
+ fd_reset_wfi(batch);
}
-
static inline enum pc_di_index_size
size2indextype(unsigned index_size)
{
- switch (index_size) {
- case 1: return INDEX_SIZE_8_BIT;
- case 2: return INDEX_SIZE_16_BIT;
- case 4: return INDEX_SIZE_32_BIT;
- }
- DBG("unsupported index size: %d", index_size);
- assert(0);
- return INDEX_SIZE_IGN;
+ switch (index_size) {
+ case 1:
+ return INDEX_SIZE_8_BIT;
+ case 2:
+ return INDEX_SIZE_16_BIT;
+ case 4:
+ return INDEX_SIZE_32_BIT;
+ }
+ DBG("unsupported index size: %d", index_size);
+ assert(0);
+ return INDEX_SIZE_IGN;
}
/* this is same for a2xx/a3xx, so split into helper: */
static inline void
fd_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
- enum pc_di_primtype primtype,
- enum pc_di_vis_cull_mode vismode,
- const struct pipe_draw_info *info,
- const struct pipe_draw_start_count *draw,
- unsigned index_offset)
+ enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info,
+ const struct pipe_draw_start_count *draw, unsigned index_offset)
{
- struct pipe_resource *idx_buffer = NULL;
- enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
- enum pc_di_src_sel src_sel;
- uint32_t idx_size, idx_offset;
-
- if (info->index_size) {
- assert(!info->has_user_indices);
-
- idx_buffer = info->index.resource;
- idx_type = size2indextype(info->index_size);
- idx_size = info->index_size * draw->count;
- idx_offset = index_offset + draw->start * info->index_size;
- src_sel = DI_SRC_SEL_DMA;
- } else {
- idx_buffer = NULL;
- idx_type = INDEX_SIZE_IGN;
- idx_size = 0;
- idx_offset = 0;
- src_sel = DI_SRC_SEL_AUTO_INDEX;
- }
-
- fd_draw(batch, ring, primtype, vismode, src_sel,
- draw->count, info->instance_count - 1,
- idx_type, idx_size, idx_offset, idx_buffer);
+ struct pipe_resource *idx_buffer = NULL;
+ enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+ enum pc_di_src_sel src_sel;
+ uint32_t idx_size, idx_offset;
+
+ if (info->index_size) {
+ assert(!info->has_user_indices);
+
+ idx_buffer = info->index.resource;
+ idx_type = size2indextype(info->index_size);
+ idx_size = info->index_size * draw->count;
+ idx_offset = index_offset + draw->start * info->index_size;
+ src_sel = DI_SRC_SEL_DMA;
+ } else {
+ idx_buffer = NULL;
+ idx_type = INDEX_SIZE_IGN;
+ idx_size = 0;
+ idx_offset = 0;
+ src_sel = DI_SRC_SEL_AUTO_INDEX;
+ }
+
+ fd_draw(batch, ring, primtype, vismode, src_sel, draw->count,
+ info->instance_count - 1, idx_type, idx_size, idx_offset,
+ idx_buffer);
}
#endif /* FREEDRENO_DRAW_H_ */
#include "util/os_file.h"
#include "util/u_inlines.h"
-#include "freedreno_fence.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
+#include "freedreno_fence.h"
#include "freedreno_util.h"
/* TODO: Use the interface drm/freedreno_drmif.h instead of calling directly */
#include <xf86drm.h>
struct pipe_fence_handle {
- struct pipe_reference reference;
-
- /* fence holds a weak reference to the batch until the batch is flushed,
- * at which point fd_fence_populate() is called and timestamp and possibly
- * fence_fd become valid and the week reference is dropped.
- *
- * Note that with u_threaded_context async flushes, if a fence is requested
- * by the frontend, the fence is initially created without a weak reference
- * to the batch, which is filled in later when fd_context_flush() is called
- * from the driver thread. In this case tc_token will be non-null, in
- * which case threaded_context_flush() should be called in fd_fence_finish()
- */
- struct fd_batch *batch;
-
- struct tc_unflushed_batch_token *tc_token;
- bool needs_signal;
-
- /* For threaded_context async flushes, we must wait on the fence, signalled
- * in fd_fence_populate(), to know that the rendering has been actually
- * flushed from the driver thread.
- *
- * The ready fence is created signaled for non-async-flush fences, and only
- * transitions once from unsignalled->signalled for async-flush fences
- */
- struct util_queue_fence ready;
-
- /* Note that a fence can outlive the ctx, so we can only assume this is a
- * valid ptr for unflushed fences. However we hold a reference to the
- * fence->pipe so that is safe to use after flushing.
- */
- struct fd_context *ctx;
- struct fd_pipe *pipe;
- struct fd_screen *screen;
- int fence_fd;
- uint32_t timestamp;
- uint32_t syncobj;
+ struct pipe_reference reference;
+
+ /* fence holds a weak reference to the batch until the batch is flushed,
+ * at which point fd_fence_populate() is called and timestamp and possibly
+ * fence_fd become valid and the week reference is dropped.
+ *
+ * Note that with u_threaded_context async flushes, if a fence is requested
+ * by the frontend, the fence is initially created without a weak reference
+ * to the batch, which is filled in later when fd_context_flush() is called
+ * from the driver thread. In this case tc_token will be non-null, in
+ * which case threaded_context_flush() should be called in fd_fence_finish()
+ */
+ struct fd_batch *batch;
+
+ struct tc_unflushed_batch_token *tc_token;
+ bool needs_signal;
+
+ /* For threaded_context async flushes, we must wait on the fence, signalled
+ * in fd_fence_populate(), to know that the rendering has been actually
+ * flushed from the driver thread.
+ *
+ * The ready fence is created signaled for non-async-flush fences, and only
+ * transitions once from unsignalled->signalled for async-flush fences
+ */
+ struct util_queue_fence ready;
+
+ /* Note that a fence can outlive the ctx, so we can only assume this is a
+ * valid ptr for unflushed fences. However we hold a reference to the
+ * fence->pipe so that is safe to use after flushing.
+ */
+ struct fd_context *ctx;
+ struct fd_pipe *pipe;
+ struct fd_screen *screen;
+ int fence_fd;
+ uint32_t timestamp;
+ uint32_t syncobj;
};
static bool
-fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence, uint64_t timeout)
- /* NOTE: in the !fence_is_signalled() case we may be called from non-driver
- * thread, but we don't call fd_batch_flush() in that case
- */
- in_dt
+fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence,
+ uint64_t timeout)
+ /* NOTE: in the !fence_is_signalled() case we may be called from non-driver
+ * thread, but we don't call fd_batch_flush() in that case
+ */
+ in_dt
{
- if (!util_queue_fence_is_signalled(&fence->ready)) {
- if (fence->tc_token) {
- threaded_context_flush(pctx, fence->tc_token,
- timeout == 0);
- }
-
- if (!timeout)
- return false;
-
- if (timeout == PIPE_TIMEOUT_INFINITE) {
- util_queue_fence_wait(&fence->ready);
- } else {
- int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
- if (!util_queue_fence_wait_timeout(&fence->ready, abs_timeout)) {
- return false;
- }
- }
-
- /* We've already waited for batch to be flushed and fd_fence_populate()
- * called:
- */
- assert(!fence->batch);
- return true;
- }
-
- if (fence->batch)
- fd_batch_flush(fence->batch);
-
- debug_assert(!fence->batch);
-
- return true;
+ if (!util_queue_fence_is_signalled(&fence->ready)) {
+ if (fence->tc_token) {
+ threaded_context_flush(pctx, fence->tc_token, timeout == 0);
+ }
+
+ if (!timeout)
+ return false;
+
+ if (timeout == PIPE_TIMEOUT_INFINITE) {
+ util_queue_fence_wait(&fence->ready);
+ } else {
+ int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+ if (!util_queue_fence_wait_timeout(&fence->ready, abs_timeout)) {
+ return false;
+ }
+ }
+
+ /* We've already waited for batch to be flushed and fd_fence_populate()
+ * called:
+ */
+ assert(!fence->batch);
+ return true;
+ }
+
+ if (fence->batch)
+ fd_batch_flush(fence->batch);
+
+ debug_assert(!fence->batch);
+
+ return true;
}
-void fd_fence_populate(struct pipe_fence_handle *fence,
- uint32_t timestamp, int fence_fd)
+void
+fd_fence_populate(struct pipe_fence_handle *fence, uint32_t timestamp,
+ int fence_fd)
{
- if (!fence->batch)
- return;
- fence->timestamp = timestamp;
- fence->fence_fd = fence_fd;
- fence->batch = NULL;
-
- if (fence->needs_signal) {
- util_queue_fence_signal(&fence->ready);
- fence->needs_signal = false;
- }
+ if (!fence->batch)
+ return;
+ fence->timestamp = timestamp;
+ fence->fence_fd = fence_fd;
+ fence->batch = NULL;
+
+ if (fence->needs_signal) {
+ util_queue_fence_signal(&fence->ready);
+ fence->needs_signal = false;
+ }
}
-static void fd_fence_destroy(struct pipe_fence_handle *fence)
+static void
+fd_fence_destroy(struct pipe_fence_handle *fence)
{
- tc_unflushed_batch_token_reference(&fence->tc_token, NULL);
- if (fence->fence_fd != -1)
- close(fence->fence_fd);
- if (fence->syncobj)
- drmSyncobjDestroy(fd_device_fd(fence->screen->dev), fence->syncobj);
- fd_pipe_del(fence->pipe);
- FREE(fence);
+ tc_unflushed_batch_token_reference(&fence->tc_token, NULL);
+ if (fence->fence_fd != -1)
+ close(fence->fence_fd);
+ if (fence->syncobj)
+ drmSyncobjDestroy(fd_device_fd(fence->screen->dev), fence->syncobj);
+ fd_pipe_del(fence->pipe);
+ FREE(fence);
}
-void fd_fence_ref(struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *pfence)
+void
+fd_fence_ref(struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence)
{
- if (pipe_reference(&(*ptr)->reference, &pfence->reference))
- fd_fence_destroy(*ptr);
+ if (pipe_reference(&(*ptr)->reference, &pfence->reference))
+ fd_fence_destroy(*ptr);
- *ptr = pfence;
+ *ptr = pfence;
}
-bool fd_fence_finish(struct pipe_screen *pscreen,
- struct pipe_context *pctx,
- struct pipe_fence_handle *fence,
- uint64_t timeout)
+bool
+fd_fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx,
+ struct pipe_fence_handle *fence, uint64_t timeout)
{
- if (!fence_flush(pctx, fence, timeout))
- return false;
+ if (!fence_flush(pctx, fence, timeout))
+ return false;
- if (fence->fence_fd != -1) {
- int ret = sync_wait(fence->fence_fd, timeout / 1000000);
- return ret == 0;
- }
+ if (fence->fence_fd != -1) {
+ int ret = sync_wait(fence->fence_fd, timeout / 1000000);
+ return ret == 0;
+ }
- if (fd_pipe_wait_timeout(fence->pipe, fence->timestamp, timeout))
- return false;
+ if (fd_pipe_wait_timeout(fence->pipe, fence->timestamp, timeout))
+ return false;
- return true;
+ return true;
}
-static struct pipe_fence_handle * fence_create(struct fd_context *ctx,
- struct fd_batch *batch, uint32_t timestamp, int fence_fd, int syncobj)
+static struct pipe_fence_handle *
+fence_create(struct fd_context *ctx, struct fd_batch *batch, uint32_t timestamp,
+ int fence_fd, int syncobj)
{
- struct pipe_fence_handle *fence;
+ struct pipe_fence_handle *fence;
- fence = CALLOC_STRUCT(pipe_fence_handle);
- if (!fence)
- return NULL;
+ fence = CALLOC_STRUCT(pipe_fence_handle);
+ if (!fence)
+ return NULL;
- pipe_reference_init(&fence->reference, 1);
- util_queue_fence_init(&fence->ready);
+ pipe_reference_init(&fence->reference, 1);
+ util_queue_fence_init(&fence->ready);
- fence->ctx = ctx;
- fence->batch = batch;
- fence->pipe = fd_pipe_ref(ctx->pipe);
- fence->screen = ctx->screen;
- fence->timestamp = timestamp;
- fence->fence_fd = fence_fd;
- fence->syncobj = syncobj;
+ fence->ctx = ctx;
+ fence->batch = batch;
+ fence->pipe = fd_pipe_ref(ctx->pipe);
+ fence->screen = ctx->screen;
+ fence->timestamp = timestamp;
+ fence->fence_fd = fence_fd;
+ fence->syncobj = syncobj;
- return fence;
+ return fence;
}
-void fd_create_fence_fd(struct pipe_context *pctx,
- struct pipe_fence_handle **pfence, int fd,
- enum pipe_fd_type type)
+void
+fd_create_fence_fd(struct pipe_context *pctx, struct pipe_fence_handle **pfence,
+ int fd, enum pipe_fd_type type)
{
- struct fd_context *ctx = fd_context(pctx);
-
- switch (type) {
- case PIPE_FD_TYPE_NATIVE_SYNC:
- *pfence = fence_create(fd_context(pctx), NULL, 0, os_dupfd_cloexec(fd), 0);
- break;
- case PIPE_FD_TYPE_SYNCOBJ: {
- int ret;
- uint32_t syncobj;
-
- assert(ctx->screen->has_syncobj);
- ret = drmSyncobjFDToHandle(fd_device_fd(ctx->screen->dev), fd, &syncobj);
- if (!ret)
- close(fd);
-
- *pfence = fence_create(fd_context(pctx), NULL, 0, -1, syncobj);
- break;
- }
- default:
- unreachable("Unhandled fence type");
- }
+ struct fd_context *ctx = fd_context(pctx);
+
+ switch (type) {
+ case PIPE_FD_TYPE_NATIVE_SYNC:
+ *pfence =
+ fence_create(fd_context(pctx), NULL, 0, os_dupfd_cloexec(fd), 0);
+ break;
+ case PIPE_FD_TYPE_SYNCOBJ: {
+ int ret;
+ uint32_t syncobj;
+
+ assert(ctx->screen->has_syncobj);
+ ret = drmSyncobjFDToHandle(fd_device_fd(ctx->screen->dev), fd, &syncobj);
+ if (!ret)
+ close(fd);
+
+ *pfence = fence_create(fd_context(pctx), NULL, 0, -1, syncobj);
+ break;
+ }
+ default:
+ unreachable("Unhandled fence type");
+ }
}
-void fd_fence_server_sync(struct pipe_context *pctx,
- struct pipe_fence_handle *fence)
+void
+fd_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *fence)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- /* NOTE: we don't expect the combination of fence-fd + async-flush-fence,
- * so timeout==0 is ok here:
- */
- fence_flush(pctx, fence, 0);
+ /* NOTE: we don't expect the combination of fence-fd + async-flush-fence,
+ * so timeout==0 is ok here:
+ */
+ fence_flush(pctx, fence, 0);
- /* if not an external fence, then nothing more to do without preemption: */
- if (fence->fence_fd == -1)
- return;
+ /* if not an external fence, then nothing more to do without preemption: */
+ if (fence->fence_fd == -1)
+ return;
- if (sync_accumulate("freedreno", &ctx->in_fence_fd, fence->fence_fd)) {
- /* error */
- }
+ if (sync_accumulate("freedreno", &ctx->in_fence_fd, fence->fence_fd)) {
+ /* error */
+ }
}
-void fd_fence_server_signal(struct pipe_context *pctx,
- struct pipe_fence_handle *fence)
+void
+fd_fence_server_signal(struct pipe_context *pctx,
+ struct pipe_fence_handle *fence)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (fence->syncobj) {
- drmSyncobjSignal(fd_device_fd(ctx->screen->dev), &fence->syncobj, 1);
- }
+ if (fence->syncobj) {
+ drmSyncobjSignal(fd_device_fd(ctx->screen->dev), &fence->syncobj, 1);
+ }
}
-int fd_fence_get_fd(struct pipe_screen *pscreen,
- struct pipe_fence_handle *fence)
+int
+fd_fence_get_fd(struct pipe_screen *pscreen, struct pipe_fence_handle *fence)
{
- /* NOTE: in the deferred fence case, the pctx we want is the threaded-ctx
- * but if TC is not used, this will be null. Which is fine, we won't call
- * threaded_context_flush() in that case
- */
- fence_flush(&fence->ctx->tc->base, fence, PIPE_TIMEOUT_INFINITE);
- return os_dupfd_cloexec(fence->fence_fd);
+ /* NOTE: in the deferred fence case, the pctx we want is the threaded-ctx
+ * but if TC is not used, this will be null. Which is fine, we won't call
+ * threaded_context_flush() in that case
+ */
+ fence_flush(&fence->ctx->tc->base, fence, PIPE_TIMEOUT_INFINITE);
+ return os_dupfd_cloexec(fence->fence_fd);
}
-bool fd_fence_is_fd(struct pipe_fence_handle *fence)
+bool
+fd_fence_is_fd(struct pipe_fence_handle *fence)
{
- return fence->fence_fd != -1;
+ return fence->fence_fd != -1;
}
-struct pipe_fence_handle * fd_fence_create(struct fd_batch *batch)
+struct pipe_fence_handle *
+fd_fence_create(struct fd_batch *batch)
{
- return fence_create(batch->ctx, batch, 0, -1, 0);
+ return fence_create(batch->ctx, batch, 0, -1, 0);
}
void
fd_fence_set_batch(struct pipe_fence_handle *fence, struct fd_batch *batch)
{
- assert(!fence->batch);
- fence->batch = batch;
+ assert(!fence->batch);
+ fence->batch = batch;
}
struct pipe_fence_handle *
fd_fence_create_unflushed(struct pipe_context *pctx,
- struct tc_unflushed_batch_token *tc_token)
+ struct tc_unflushed_batch_token *tc_token)
{
- struct pipe_fence_handle *fence =
- fence_create(fd_context(pctx), NULL, 0, -1, 0);
- fence->needs_signal = true;
- util_queue_fence_reset(&fence->ready);
- tc_unflushed_batch_token_reference(&fence->tc_token, tc_token);
- return fence;
+ struct pipe_fence_handle *fence =
+ fence_create(fd_context(pctx), NULL, 0, -1, 0);
+ fence->needs_signal = true;
+ util_queue_fence_reset(&fence->ready);
+ tc_unflushed_batch_token_reference(&fence->tc_token, tc_token);
+ return fence;
}
#include "pipe/p_context.h"
-void fd_fence_populate(struct pipe_fence_handle *fence,
- uint32_t timestamp, int fence_fd);
+void fd_fence_populate(struct pipe_fence_handle *fence, uint32_t timestamp,
+ int fence_fd);
void fd_fence_ref(struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *pfence);
-bool fd_fence_finish(struct pipe_screen *pscreen,
- struct pipe_context *ctx,
- struct pipe_fence_handle *pfence,
- uint64_t timeout);
+ struct pipe_fence_handle *pfence);
+bool fd_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx,
+ struct pipe_fence_handle *pfence, uint64_t timeout);
void fd_create_fence_fd(struct pipe_context *pctx,
- struct pipe_fence_handle **pfence, int fd,
- enum pipe_fd_type type);
+ struct pipe_fence_handle **pfence, int fd,
+ enum pipe_fd_type type);
void fd_fence_server_sync(struct pipe_context *pctx,
- struct pipe_fence_handle *fence);
+ struct pipe_fence_handle *fence);
void fd_fence_server_signal(struct pipe_context *ctx,
- struct pipe_fence_handle *fence);
+ struct pipe_fence_handle *fence);
int fd_fence_get_fd(struct pipe_screen *pscreen,
- struct pipe_fence_handle *pfence);
+ struct pipe_fence_handle *pfence);
bool fd_fence_is_fd(struct pipe_fence_handle *fence);
struct fd_batch;
-struct pipe_fence_handle * fd_fence_create(struct fd_batch *batch);
+struct pipe_fence_handle *fd_fence_create(struct fd_batch *batch);
-
-void fd_fence_set_batch(struct pipe_fence_handle *fence, struct fd_batch *batch);
+void fd_fence_set_batch(struct pipe_fence_handle *fence,
+ struct fd_batch *batch);
struct tc_unflushed_batch_token;
-struct pipe_fence_handle *fd_fence_create_unflushed(struct pipe_context *pctx,
- struct tc_unflushed_batch_token *tc_token);
+struct pipe_fence_handle *
+fd_fence_create_unflushed(struct pipe_context *pctx,
+ struct tc_unflushed_batch_token *tc_token);
#endif /* FREEDRENO_FENCE_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-#include "util/debug.h"
#include "pipe/p_state.h"
+#include "util/debug.h"
+#include "util/format/u_format.h"
#include "util/hash_table.h"
#include "util/u_dump.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
-#include "util/format/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "u_tracepoints.h"
-#include "freedreno_gmem.h"
#include "freedreno_context.h"
#include "freedreno_fence.h"
-#include "freedreno_resource.h"
+#include "freedreno_gmem.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
#include "freedreno_tracepoints.h"
#include "freedreno_util.h"
*/
#ifndef BIN_DEBUG
-# define BIN_DEBUG 0
+#define BIN_DEBUG 0
#endif
/*
*/
struct gmem_key {
- uint16_t minx, miny;
- uint16_t width, height;
- uint8_t gmem_page_align; /* alignment in multiples of 0x1000 to reduce key size */
- uint8_t nr_cbufs;
- uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
- uint8_t zsbuf_cpp[2];
+ uint16_t minx, miny;
+ uint16_t width, height;
+ uint8_t
+ gmem_page_align; /* alignment in multiples of 0x1000 to reduce key size */
+ uint8_t nr_cbufs;
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
+ uint8_t zsbuf_cpp[2];
};
static uint32_t
gmem_key_hash(const void *_key)
{
- const struct gmem_key *key = _key;
- return _mesa_hash_data(key, sizeof(*key));
+ const struct gmem_key *key = _key;
+ return _mesa_hash_data(key, sizeof(*key));
}
static bool
gmem_key_equals(const void *_a, const void *_b)
{
- const struct gmem_key *a = _a;
- const struct gmem_key *b = _b;
- return memcmp(a, b, sizeof(*a)) == 0;
+ const struct gmem_key *a = _a;
+ const struct gmem_key *b = _b;
+ return memcmp(a, b, sizeof(*a)) == 0;
}
static void
dump_gmem_key(const struct gmem_key *key)
{
- printf("{ .minx=%u, .miny=%u, .width=%u, .height=%u",
- key->minx, key->miny, key->width, key->height);
- printf(", .gmem_page_align=%u, .nr_cbufs=%u",
- key->gmem_page_align, key->nr_cbufs);
- printf(", .cbuf_cpp = {");
- for (unsigned i = 0; i < ARRAY_SIZE(key->cbuf_cpp); i++)
- printf("%u,", key->cbuf_cpp[i]);
- printf("}, .zsbuf_cpp = {");
- for (unsigned i = 0; i < ARRAY_SIZE(key->zsbuf_cpp); i++)
- printf("%u,", key->zsbuf_cpp[i]);
- printf("}},\n");
+ printf("{ .minx=%u, .miny=%u, .width=%u, .height=%u", key->minx, key->miny,
+ key->width, key->height);
+ printf(", .gmem_page_align=%u, .nr_cbufs=%u", key->gmem_page_align,
+ key->nr_cbufs);
+ printf(", .cbuf_cpp = {");
+ for (unsigned i = 0; i < ARRAY_SIZE(key->cbuf_cpp); i++)
+ printf("%u,", key->cbuf_cpp[i]);
+ printf("}, .zsbuf_cpp = {");
+ for (unsigned i = 0; i < ARRAY_SIZE(key->zsbuf_cpp); i++)
+ printf("%u,", key->zsbuf_cpp[i]);
+ printf("}},\n");
}
static void
dump_gmem_state(const struct fd_gmem_stateobj *gmem)
{
- unsigned total = 0;
- printf("GMEM LAYOUT: bin=%ux%u, nbins=%ux%u\n",
- gmem->bin_w, gmem->bin_h, gmem->nbins_x, gmem->nbins_y);
- for (int i = 0; i < ARRAY_SIZE(gmem->cbuf_base); i++) {
- if (!gmem->cbuf_cpp[i])
- continue;
+ unsigned total = 0;
+ printf("GMEM LAYOUT: bin=%ux%u, nbins=%ux%u\n", gmem->bin_w, gmem->bin_h,
+ gmem->nbins_x, gmem->nbins_y);
+ for (int i = 0; i < ARRAY_SIZE(gmem->cbuf_base); i++) {
+ if (!gmem->cbuf_cpp[i])
+ continue;
- unsigned size = gmem->cbuf_cpp[i] * gmem->bin_w * gmem->bin_h;
- printf(" cbuf[%d]: base=0x%06x, size=0x%x, cpp=%u\n", i,
- gmem->cbuf_base[i], size, gmem->cbuf_cpp[i]);
+ unsigned size = gmem->cbuf_cpp[i] * gmem->bin_w * gmem->bin_h;
+ printf(" cbuf[%d]: base=0x%06x, size=0x%x, cpp=%u\n", i,
+ gmem->cbuf_base[i], size, gmem->cbuf_cpp[i]);
- total = gmem->cbuf_base[i] + size;
- }
+ total = gmem->cbuf_base[i] + size;
+ }
- for (int i = 0; i < ARRAY_SIZE(gmem->zsbuf_base); i++) {
- if (!gmem->zsbuf_cpp[i])
- continue;
+ for (int i = 0; i < ARRAY_SIZE(gmem->zsbuf_base); i++) {
+ if (!gmem->zsbuf_cpp[i])
+ continue;
- unsigned size = gmem->zsbuf_cpp[i] * gmem->bin_w * gmem->bin_h;
- printf(" zsbuf[%d]: base=0x%06x, size=0x%x, cpp=%u\n", i,
- gmem->zsbuf_base[i], size, gmem->zsbuf_cpp[i]);
+ unsigned size = gmem->zsbuf_cpp[i] * gmem->bin_w * gmem->bin_h;
+ printf(" zsbuf[%d]: base=0x%06x, size=0x%x, cpp=%u\n", i,
+ gmem->zsbuf_base[i], size, gmem->zsbuf_cpp[i]);
- total = gmem->zsbuf_base[i] + size;
- }
+ total = gmem->zsbuf_base[i] + size;
+ }
- printf("total: 0x%06x (of 0x%06x)\n", total,
- gmem->screen->gmemsize_bytes);
+ printf("total: 0x%06x (of 0x%06x)\n", total, gmem->screen->gmemsize_bytes);
}
static unsigned
div_align(unsigned num, unsigned denom, unsigned al)
{
- return util_align_npot(DIV_ROUND_UP(num, denom), al);
+ return util_align_npot(DIV_ROUND_UP(num, denom), al);
}
static bool
layout_gmem(struct gmem_key *key, uint32_t nbins_x, uint32_t nbins_y,
- struct fd_gmem_stateobj *gmem)
+ struct fd_gmem_stateobj *gmem)
{
- struct fd_screen *screen = gmem->screen;
- uint32_t gmem_align = key->gmem_page_align * 0x1000;
- uint32_t total = 0, i;
-
- if ((nbins_x == 0) || (nbins_y == 0))
- return false;
-
- uint32_t bin_w, bin_h;
- bin_w = div_align(key->width, nbins_x, screen->info.tile_align_w);
- bin_h = div_align(key->height, nbins_y, screen->info.tile_align_h);
-
- if (bin_w > screen->info.tile_max_w)
- return false;
-
- if (bin_h > screen->info.tile_max_h)
- return false;
-
- gmem->bin_w = bin_w;
- gmem->bin_h = bin_h;
-
- /* due to aligning bin_w/h, we could end up with one too
- * many bins in either dimension, so recalculate:
- */
- gmem->nbins_x = DIV_ROUND_UP(key->width, bin_w);
- gmem->nbins_y = DIV_ROUND_UP(key->height, bin_h);
-
- for (i = 0; i < MAX_RENDER_TARGETS; i++) {
- if (key->cbuf_cpp[i]) {
- gmem->cbuf_base[i] = util_align_npot(total, gmem_align);
- total = gmem->cbuf_base[i] + key->cbuf_cpp[i] * bin_w * bin_h;
- }
- }
-
- if (key->zsbuf_cpp[0]) {
- gmem->zsbuf_base[0] = util_align_npot(total, gmem_align);
- total = gmem->zsbuf_base[0] + key->zsbuf_cpp[0] * bin_w * bin_h;
- }
-
- if (key->zsbuf_cpp[1]) {
- gmem->zsbuf_base[1] = util_align_npot(total, gmem_align);
- total = gmem->zsbuf_base[1] + key->zsbuf_cpp[1] * bin_w * bin_h;
- }
-
- return total <= screen->gmemsize_bytes;
+ struct fd_screen *screen = gmem->screen;
+ uint32_t gmem_align = key->gmem_page_align * 0x1000;
+ uint32_t total = 0, i;
+
+ if ((nbins_x == 0) || (nbins_y == 0))
+ return false;
+
+ uint32_t bin_w, bin_h;
+ bin_w = div_align(key->width, nbins_x, screen->info.tile_align_w);
+ bin_h = div_align(key->height, nbins_y, screen->info.tile_align_h);
+
+ if (bin_w > screen->info.tile_max_w)
+ return false;
+
+ if (bin_h > screen->info.tile_max_h)
+ return false;
+
+ gmem->bin_w = bin_w;
+ gmem->bin_h = bin_h;
+
+ /* due to aligning bin_w/h, we could end up with one too
+ * many bins in either dimension, so recalculate:
+ */
+ gmem->nbins_x = DIV_ROUND_UP(key->width, bin_w);
+ gmem->nbins_y = DIV_ROUND_UP(key->height, bin_h);
+
+ for (i = 0; i < MAX_RENDER_TARGETS; i++) {
+ if (key->cbuf_cpp[i]) {
+ gmem->cbuf_base[i] = util_align_npot(total, gmem_align);
+ total = gmem->cbuf_base[i] + key->cbuf_cpp[i] * bin_w * bin_h;
+ }
+ }
+
+ if (key->zsbuf_cpp[0]) {
+ gmem->zsbuf_base[0] = util_align_npot(total, gmem_align);
+ total = gmem->zsbuf_base[0] + key->zsbuf_cpp[0] * bin_w * bin_h;
+ }
+
+ if (key->zsbuf_cpp[1]) {
+ gmem->zsbuf_base[1] = util_align_npot(total, gmem_align);
+ total = gmem->zsbuf_base[1] + key->zsbuf_cpp[1] * bin_w * bin_h;
+ }
+
+ return total <= screen->gmemsize_bytes;
}
static void
calc_nbins(struct gmem_key *key, struct fd_gmem_stateobj *gmem)
{
- struct fd_screen *screen = gmem->screen;
- uint32_t nbins_x = 1, nbins_y = 1;
- uint32_t max_width = screen->info.tile_max_w;
- uint32_t max_height = screen->info.tile_max_h;
-
- if (FD_DBG(MSGS)) {
- debug_printf("binning input: cbuf cpp:");
- for (unsigned i = 0; i < key->nr_cbufs; i++)
- debug_printf(" %d", key->cbuf_cpp[i]);
- debug_printf(", zsbuf cpp: %d; %dx%d\n",
- key->zsbuf_cpp[0], key->width, key->height);
- }
-
- /* first, find a bin size that satisfies the maximum width/
- * height restrictions:
- */
- while (div_align(key->width, nbins_x, screen->info.tile_align_w) > max_width) {
- nbins_x++;
- }
-
- while (div_align(key->height, nbins_y, screen->info.tile_align_h) > max_height) {
- nbins_y++;
- }
-
- /* then find a bin width/height that satisfies the memory
- * constraints:
- */
- while (!layout_gmem(key, nbins_x, nbins_y, gmem)) {
- if (nbins_y > nbins_x) {
- nbins_x++;
- } else {
- nbins_y++;
- }
- }
-
- /* Lets see if we can tweak the layout a bit and come up with
- * something better:
- */
- if ((((nbins_x - 1) * (nbins_y + 1)) < (nbins_x * nbins_y)) &&
- layout_gmem(key, nbins_x - 1, nbins_y + 1, gmem)) {
- nbins_x--;
- nbins_y++;
- } else if ((((nbins_x + 1) * (nbins_y - 1)) < (nbins_x * nbins_y)) &&
- layout_gmem(key, nbins_x + 1, nbins_y - 1, gmem)) {
- nbins_x++;
- nbins_y--;
- }
-
- layout_gmem(key, nbins_x, nbins_y, gmem);
+ struct fd_screen *screen = gmem->screen;
+ uint32_t nbins_x = 1, nbins_y = 1;
+ uint32_t max_width = screen->info.tile_max_w;
+ uint32_t max_height = screen->info.tile_max_h;
+
+ if (FD_DBG(MSGS)) {
+ debug_printf("binning input: cbuf cpp:");
+ for (unsigned i = 0; i < key->nr_cbufs; i++)
+ debug_printf(" %d", key->cbuf_cpp[i]);
+ debug_printf(", zsbuf cpp: %d; %dx%d\n", key->zsbuf_cpp[0], key->width,
+ key->height);
+ }
+
+ /* first, find a bin size that satisfies the maximum width/
+ * height restrictions:
+ */
+ while (div_align(key->width, nbins_x, screen->info.tile_align_w) >
+ max_width) {
+ nbins_x++;
+ }
+
+ while (div_align(key->height, nbins_y, screen->info.tile_align_h) >
+ max_height) {
+ nbins_y++;
+ }
+
+ /* then find a bin width/height that satisfies the memory
+ * constraints:
+ */
+ while (!layout_gmem(key, nbins_x, nbins_y, gmem)) {
+ if (nbins_y > nbins_x) {
+ nbins_x++;
+ } else {
+ nbins_y++;
+ }
+ }
+
+ /* Lets see if we can tweak the layout a bit and come up with
+ * something better:
+ */
+ if ((((nbins_x - 1) * (nbins_y + 1)) < (nbins_x * nbins_y)) &&
+ layout_gmem(key, nbins_x - 1, nbins_y + 1, gmem)) {
+ nbins_x--;
+ nbins_y++;
+ } else if ((((nbins_x + 1) * (nbins_y - 1)) < (nbins_x * nbins_y)) &&
+ layout_gmem(key, nbins_x + 1, nbins_y - 1, gmem)) {
+ nbins_x++;
+ nbins_y--;
+ }
+
+ layout_gmem(key, nbins_x, nbins_y, gmem);
}
static struct fd_gmem_stateobj *
gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key)
{
- struct fd_gmem_stateobj *gmem =
- rzalloc(screen->gmem_cache.ht, struct fd_gmem_stateobj);
- pipe_reference_init(&gmem->reference, 1);
- gmem->screen = screen;
- gmem->key = key;
- list_inithead(&gmem->node);
-
- const unsigned npipes = screen->info.num_vsc_pipes;
- uint32_t i, j, t, xoff, yoff;
- uint32_t tpp_x, tpp_y;
- int tile_n[npipes];
-
- calc_nbins(key, gmem);
-
- DBG("using %d bins of size %dx%d", gmem->nbins_x * gmem->nbins_y,
- gmem->bin_w, gmem->bin_h);
-
- memcpy(gmem->cbuf_cpp, key->cbuf_cpp, sizeof(key->cbuf_cpp));
- memcpy(gmem->zsbuf_cpp, key->zsbuf_cpp, sizeof(key->zsbuf_cpp));
- gmem->minx = key->minx;
- gmem->miny = key->miny;
- gmem->width = key->width;
- gmem->height = key->height;
-
- if (BIN_DEBUG) {
- dump_gmem_state(gmem);
- dump_gmem_key(key);
- }
-
- /*
- * Assign tiles and pipes:
- *
- * At some point it might be worth playing with different
- * strategies and seeing if that makes much impact on
- * performance.
- */
-
-#define div_round_up(v, a) (((v) + (a) - 1) / (a))
- /* figure out number of tiles per pipe: */
- if (is_a20x(screen)) {
- /* for a20x we want to minimize the number of "pipes"
- * binning data has 3 bits for x/y (8x8) but the edges are used to
- * cull off-screen vertices with hw binning, so we have 6x6 pipes
- */
- tpp_x = 6;
- tpp_y = 6;
- } else {
- tpp_x = tpp_y = 1;
- while (div_round_up(gmem->nbins_y, tpp_y) > npipes)
- tpp_y += 2;
- while ((div_round_up(gmem->nbins_y, tpp_y) *
- div_round_up(gmem->nbins_x, tpp_x)) > npipes)
- tpp_x += 1;
- }
+ struct fd_gmem_stateobj *gmem =
+ rzalloc(screen->gmem_cache.ht, struct fd_gmem_stateobj);
+ pipe_reference_init(&gmem->reference, 1);
+ gmem->screen = screen;
+ gmem->key = key;
+ list_inithead(&gmem->node);
+
+ const unsigned npipes = screen->info.num_vsc_pipes;
+ uint32_t i, j, t, xoff, yoff;
+ uint32_t tpp_x, tpp_y;
+ int tile_n[npipes];
+
+ calc_nbins(key, gmem);
+
+ DBG("using %d bins of size %dx%d", gmem->nbins_x * gmem->nbins_y,
+ gmem->bin_w, gmem->bin_h);
+
+ memcpy(gmem->cbuf_cpp, key->cbuf_cpp, sizeof(key->cbuf_cpp));
+ memcpy(gmem->zsbuf_cpp, key->zsbuf_cpp, sizeof(key->zsbuf_cpp));
+ gmem->minx = key->minx;
+ gmem->miny = key->miny;
+ gmem->width = key->width;
+ gmem->height = key->height;
+
+ if (BIN_DEBUG) {
+ dump_gmem_state(gmem);
+ dump_gmem_key(key);
+ }
+
+ /*
+ * Assign tiles and pipes:
+ *
+ * At some point it might be worth playing with different
+ * strategies and seeing if that makes much impact on
+ * performance.
+ */
+
+#define div_round_up(v, a) (((v) + (a)-1) / (a))
+ /* figure out number of tiles per pipe: */
+ if (is_a20x(screen)) {
+ /* for a20x we want to minimize the number of "pipes"
+ * binning data has 3 bits for x/y (8x8) but the edges are used to
+ * cull off-screen vertices with hw binning, so we have 6x6 pipes
+ */
+ tpp_x = 6;
+ tpp_y = 6;
+ } else {
+ tpp_x = tpp_y = 1;
+ while (div_round_up(gmem->nbins_y, tpp_y) > npipes)
+ tpp_y += 2;
+ while ((div_round_up(gmem->nbins_y, tpp_y) *
+ div_round_up(gmem->nbins_x, tpp_x)) > npipes)
+ tpp_x += 1;
+ }
#ifdef DEBUG
- tpp_x = env_var_as_unsigned("TPP_X", tpp_x);
- tpp_y = env_var_as_unsigned("TPP_Y", tpp_x);
+ tpp_x = env_var_as_unsigned("TPP_X", tpp_x);
+ tpp_y = env_var_as_unsigned("TPP_Y", tpp_x);
#endif
- gmem->maxpw = tpp_x;
- gmem->maxph = tpp_y;
-
- /* configure pipes: */
- xoff = yoff = 0;
- for (i = 0; i < npipes; i++) {
- struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
-
- if (xoff >= gmem->nbins_x) {
- xoff = 0;
- yoff += tpp_y;
- }
-
- if (yoff >= gmem->nbins_y) {
- break;
- }
-
- pipe->x = xoff;
- pipe->y = yoff;
- pipe->w = MIN2(tpp_x, gmem->nbins_x - xoff);
- pipe->h = MIN2(tpp_y, gmem->nbins_y - yoff);
-
- xoff += tpp_x;
- }
-
- /* number of pipes to use for a20x */
- gmem->num_vsc_pipes = MAX2(1, i);
-
- for (; i < npipes; i++) {
- struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- pipe->x = pipe->y = pipe->w = pipe->h = 0;
- }
-
- if (BIN_DEBUG) {
- printf("%dx%d ... tpp=%dx%d\n", gmem->nbins_x, gmem->nbins_y, tpp_x, tpp_y);
- for (i = 0; i < ARRAY_SIZE(gmem->vsc_pipe); i++) {
- struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
- printf("pipe[%d]: %ux%u @ %u,%u\n", i,
- pipe->w, pipe->h, pipe->x, pipe->y);
- }
- }
-
- /* configure tiles: */
- t = 0;
- yoff = key->miny;
- memset(tile_n, 0, sizeof(tile_n));
- for (i = 0; i < gmem->nbins_y; i++) {
- int bw, bh;
-
- xoff = key->minx;
-
- /* clip bin height: */
- bh = MIN2(gmem->bin_h, key->miny + key->height - yoff);
- assert(bh > 0);
-
- for (j = 0; j < gmem->nbins_x; j++) {
- struct fd_tile *tile = &gmem->tile[t];
- uint32_t p;
-
- assert(t < ARRAY_SIZE(gmem->tile));
-
- /* pipe number: */
- p = ((i / tpp_y) * div_round_up(gmem->nbins_x, tpp_x)) + (j / tpp_x);
- assert(p < gmem->num_vsc_pipes);
-
- /* clip bin width: */
- bw = MIN2(gmem->bin_w, key->minx + key->width - xoff);
- assert(bw > 0);
-
- tile->n = !is_a20x(screen) ? tile_n[p]++ :
- ((i % tpp_y + 1) << 3 | (j % tpp_x + 1));
- tile->p = p;
- tile->bin_w = bw;
- tile->bin_h = bh;
- tile->xoff = xoff;
- tile->yoff = yoff;
-
- if (BIN_DEBUG) {
- printf("tile[%d]: p=%u, bin=%ux%u+%u+%u\n", t,
- p, bw, bh, xoff, yoff);
- }
-
- t++;
-
- xoff += bw;
- }
-
- yoff += bh;
- }
-
- if (BIN_DEBUG) {
- t = 0;
- for (i = 0; i < gmem->nbins_y; i++) {
- for (j = 0; j < gmem->nbins_x; j++) {
- struct fd_tile *tile = &gmem->tile[t++];
- printf("|p:%u n:%u|", tile->p, tile->n);
- }
- printf("\n");
- }
- }
-
- return gmem;
+ gmem->maxpw = tpp_x;
+ gmem->maxph = tpp_y;
+
+ /* configure pipes: */
+ xoff = yoff = 0;
+ for (i = 0; i < npipes; i++) {
+ struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+
+ if (xoff >= gmem->nbins_x) {
+ xoff = 0;
+ yoff += tpp_y;
+ }
+
+ if (yoff >= gmem->nbins_y) {
+ break;
+ }
+
+ pipe->x = xoff;
+ pipe->y = yoff;
+ pipe->w = MIN2(tpp_x, gmem->nbins_x - xoff);
+ pipe->h = MIN2(tpp_y, gmem->nbins_y - yoff);
+
+ xoff += tpp_x;
+ }
+
+ /* number of pipes to use for a20x */
+ gmem->num_vsc_pipes = MAX2(1, i);
+
+ for (; i < npipes; i++) {
+ struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ pipe->x = pipe->y = pipe->w = pipe->h = 0;
+ }
+
+ if (BIN_DEBUG) {
+ printf("%dx%d ... tpp=%dx%d\n", gmem->nbins_x, gmem->nbins_y, tpp_x,
+ tpp_y);
+ for (i = 0; i < ARRAY_SIZE(gmem->vsc_pipe); i++) {
+ struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
+ printf("pipe[%d]: %ux%u @ %u,%u\n", i, pipe->w, pipe->h, pipe->x,
+ pipe->y);
+ }
+ }
+
+ /* configure tiles: */
+ t = 0;
+ yoff = key->miny;
+ memset(tile_n, 0, sizeof(tile_n));
+ for (i = 0; i < gmem->nbins_y; i++) {
+ int bw, bh;
+
+ xoff = key->minx;
+
+ /* clip bin height: */
+ bh = MIN2(gmem->bin_h, key->miny + key->height - yoff);
+ assert(bh > 0);
+
+ for (j = 0; j < gmem->nbins_x; j++) {
+ struct fd_tile *tile = &gmem->tile[t];
+ uint32_t p;
+
+ assert(t < ARRAY_SIZE(gmem->tile));
+
+ /* pipe number: */
+ p = ((i / tpp_y) * div_round_up(gmem->nbins_x, tpp_x)) + (j / tpp_x);
+ assert(p < gmem->num_vsc_pipes);
+
+ /* clip bin width: */
+ bw = MIN2(gmem->bin_w, key->minx + key->width - xoff);
+ assert(bw > 0);
+
+ tile->n = !is_a20x(screen) ? tile_n[p]++
+ : ((i % tpp_y + 1) << 3 | (j % tpp_x + 1));
+ tile->p = p;
+ tile->bin_w = bw;
+ tile->bin_h = bh;
+ tile->xoff = xoff;
+ tile->yoff = yoff;
+
+ if (BIN_DEBUG) {
+ printf("tile[%d]: p=%u, bin=%ux%u+%u+%u\n", t, p, bw, bh, xoff,
+ yoff);
+ }
+
+ t++;
+
+ xoff += bw;
+ }
+
+ yoff += bh;
+ }
+
+ if (BIN_DEBUG) {
+ t = 0;
+ for (i = 0; i < gmem->nbins_y; i++) {
+ for (j = 0; j < gmem->nbins_x; j++) {
+ struct fd_tile *tile = &gmem->tile[t++];
+ printf("|p:%u n:%u|", tile->p, tile->n);
+ }
+ printf("\n");
+ }
+ }
+
+ return gmem;
}
void
__fd_gmem_destroy(struct fd_gmem_stateobj *gmem)
{
- struct fd_gmem_cache *cache = &gmem->screen->gmem_cache;
+ struct fd_gmem_cache *cache = &gmem->screen->gmem_cache;
- fd_screen_assert_locked(gmem->screen);
+ fd_screen_assert_locked(gmem->screen);
- _mesa_hash_table_remove_key(cache->ht, gmem->key);
- list_del(&gmem->node);
+ _mesa_hash_table_remove_key(cache->ht, gmem->key);
+ list_del(&gmem->node);
- ralloc_free(gmem->key);
- ralloc_free(gmem);
+ ralloc_free(gmem->key);
+ ralloc_free(gmem);
}
static struct gmem_key *
gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt)
{
- struct fd_screen *screen = batch->ctx->screen;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- bool has_zs = pfb->zsbuf && !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED |
- FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL));
- struct gmem_key *key = rzalloc(screen->gmem_cache.ht, struct gmem_key);
-
- if (has_zs || assume_zs) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- key->zsbuf_cpp[0] = rsc->layout.cpp;
- if (rsc->stencil)
- key->zsbuf_cpp[1] = rsc->stencil->layout.cpp;
- } else {
- /* we might have a zsbuf, but it isn't used */
- batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
- batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
- }
-
- key->nr_cbufs = pfb->nr_cbufs;
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- if (pfb->cbufs[i])
- key->cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
- else
- key->cbuf_cpp[i] = 4;
- /* if MSAA, color buffers are super-sampled in GMEM: */
- key->cbuf_cpp[i] *= pfb->samples;
- }
-
- /* NOTE: on a6xx, the max-scissor-rect is handled in fd6_gmem, and
- * we just rely on CP_COND_EXEC to skip bins with no geometry.
- */
- if (no_scis_opt || is_a6xx(screen)) {
- key->minx = 0;
- key->miny = 0;
- key->width = pfb->width;
- key->height = pfb->height;
- } else {
- struct pipe_scissor_state *scissor = &batch->max_scissor;
-
- if (FD_DBG(NOSCIS)) {
- scissor->minx = 0;
- scissor->miny = 0;
- scissor->maxx = pfb->width;
- scissor->maxy = pfb->height;
- }
-
- /* round down to multiple of alignment: */
- key->minx = scissor->minx & ~(screen->info.gmem_align_w - 1);
- key->miny = scissor->miny & ~(screen->info.gmem_align_h - 1);
- key->width = scissor->maxx - key->minx;
- key->height = scissor->maxy - key->miny;
- }
-
- if (is_a20x(screen) && batch->cleared) {
- /* under normal circumstances the requirement would be 4K
- * but the fast clear path requires an alignment of 32K
- */
- key->gmem_page_align = 8;
- } else if (is_a6xx(screen)) {
- key->gmem_page_align = is_a650(screen) ? 3 : 1;
- } else {
- // TODO re-check this across gens.. maybe it should only
- // be a single page in some cases:
- key->gmem_page_align = 4;
- }
-
- return key;
+ struct fd_screen *screen = batch->ctx->screen;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ bool has_zs =
+ pfb->zsbuf &&
+ !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED |
+ FD_GMEM_CLEARS_DEPTH_STENCIL));
+ struct gmem_key *key = rzalloc(screen->gmem_cache.ht, struct gmem_key);
+
+ if (has_zs || assume_zs) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ key->zsbuf_cpp[0] = rsc->layout.cpp;
+ if (rsc->stencil)
+ key->zsbuf_cpp[1] = rsc->stencil->layout.cpp;
+ } else {
+ /* we might have a zsbuf, but it isn't used */
+ batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+ batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+ }
+
+ key->nr_cbufs = pfb->nr_cbufs;
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ if (pfb->cbufs[i])
+ key->cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
+ else
+ key->cbuf_cpp[i] = 4;
+ /* if MSAA, color buffers are super-sampled in GMEM: */
+ key->cbuf_cpp[i] *= pfb->samples;
+ }
+
+ /* NOTE: on a6xx, the max-scissor-rect is handled in fd6_gmem, and
+ * we just rely on CP_COND_EXEC to skip bins with no geometry.
+ */
+ if (no_scis_opt || is_a6xx(screen)) {
+ key->minx = 0;
+ key->miny = 0;
+ key->width = pfb->width;
+ key->height = pfb->height;
+ } else {
+ struct pipe_scissor_state *scissor = &batch->max_scissor;
+
+ if (FD_DBG(NOSCIS)) {
+ scissor->minx = 0;
+ scissor->miny = 0;
+ scissor->maxx = pfb->width;
+ scissor->maxy = pfb->height;
+ }
+
+ /* round down to multiple of alignment: */
+ key->minx = scissor->minx & ~(screen->info.gmem_align_w - 1);
+ key->miny = scissor->miny & ~(screen->info.gmem_align_h - 1);
+ key->width = scissor->maxx - key->minx;
+ key->height = scissor->maxy - key->miny;
+ }
+
+ if (is_a20x(screen) && batch->cleared) {
+ /* under normal circumstances the requirement would be 4K
+ * but the fast clear path requires an alignment of 32K
+ */
+ key->gmem_page_align = 8;
+ } else if (is_a6xx(screen)) {
+ key->gmem_page_align = is_a650(screen) ? 3 : 1;
+ } else {
+ // TODO re-check this across gens.. maybe it should only
+ // be a single page in some cases:
+ key->gmem_page_align = 4;
+ }
+
+ return key;
}
static struct fd_gmem_stateobj *
lookup_gmem_state(struct fd_batch *batch, bool assume_zs, bool no_scis_opt)
{
- struct fd_screen *screen = batch->ctx->screen;
- struct fd_gmem_cache *cache = &screen->gmem_cache;
- struct fd_gmem_stateobj *gmem = NULL;
-
- /* Lock before allocating gmem_key, since that a screen-wide
- * ralloc pool and ralloc itself is not thread-safe.
- */
- fd_screen_lock(screen);
-
- struct gmem_key *key = gmem_key_init(batch, assume_zs, no_scis_opt);
- uint32_t hash = gmem_key_hash(key);
-
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
- if (entry) {
- ralloc_free(key);
- goto found;
- }
-
- /* limit the # of cached gmem states, discarding the least
- * recently used state if needed:
- */
- if (cache->ht->entries >= 20) {
- struct fd_gmem_stateobj *last =
- list_last_entry(&cache->lru, struct fd_gmem_stateobj, node);
- fd_gmem_reference(&last, NULL);
- }
-
- entry = _mesa_hash_table_insert_pre_hashed(cache->ht,
- hash, key, gmem_stateobj_init(screen, key));
+ struct fd_screen *screen = batch->ctx->screen;
+ struct fd_gmem_cache *cache = &screen->gmem_cache;
+ struct fd_gmem_stateobj *gmem = NULL;
+
+ /* Lock before allocating gmem_key, since that a screen-wide
+ * ralloc pool and ralloc itself is not thread-safe.
+ */
+ fd_screen_lock(screen);
+
+ struct gmem_key *key = gmem_key_init(batch, assume_zs, no_scis_opt);
+ uint32_t hash = gmem_key_hash(key);
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
+ if (entry) {
+ ralloc_free(key);
+ goto found;
+ }
+
+ /* limit the # of cached gmem states, discarding the least
+ * recently used state if needed:
+ */
+ if (cache->ht->entries >= 20) {
+ struct fd_gmem_stateobj *last =
+ list_last_entry(&cache->lru, struct fd_gmem_stateobj, node);
+ fd_gmem_reference(&last, NULL);
+ }
+
+ entry = _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key,
+ gmem_stateobj_init(screen, key));
found:
- fd_gmem_reference(&gmem, entry->data);
- /* Move to the head of the LRU: */
- list_delinit(&gmem->node);
- list_add(&gmem->node, &cache->lru);
+ fd_gmem_reference(&gmem, entry->data);
+ /* Move to the head of the LRU: */
+ list_delinit(&gmem->node);
+ list_add(&gmem->node, &cache->lru);
- fd_screen_unlock(screen);
+ fd_screen_unlock(screen);
- return gmem;
+ return gmem;
}
/*
*/
static void
-render_tiles(struct fd_batch *batch, struct fd_gmem_stateobj *gmem)
- assert_dt
+render_tiles(struct fd_batch *batch, struct fd_gmem_stateobj *gmem) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- int i;
+ struct fd_context *ctx = batch->ctx;
+ int i;
- simple_mtx_lock(&ctx->gmem_lock);
+ simple_mtx_lock(&ctx->gmem_lock);
- ctx->emit_tile_init(batch);
+ ctx->emit_tile_init(batch);
- if (batch->restore)
- ctx->stats.batch_restore++;
+ if (batch->restore)
+ ctx->stats.batch_restore++;
- for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
- struct fd_tile *tile = &gmem->tile[i];
+ for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
+ struct fd_tile *tile = &gmem->tile[i];
- trace_start_tile(&batch->trace, tile->bin_h,
- tile->yoff, tile->bin_w, tile->xoff);
+ trace_start_tile(&batch->trace, tile->bin_h, tile->yoff, tile->bin_w,
+ tile->xoff);
- ctx->emit_tile_prep(batch, tile);
+ ctx->emit_tile_prep(batch, tile);
- if (batch->restore) {
- ctx->emit_tile_mem2gmem(batch, tile);
- }
+ if (batch->restore) {
+ ctx->emit_tile_mem2gmem(batch, tile);
+ }
- ctx->emit_tile_renderprep(batch, tile);
+ ctx->emit_tile_renderprep(batch, tile);
- if (ctx->query_prepare_tile)
- ctx->query_prepare_tile(batch, i, batch->gmem);
+ if (ctx->query_prepare_tile)
+ ctx->query_prepare_tile(batch, i, batch->gmem);
- /* emit IB to drawcmds: */
- trace_start_draw_ib(&batch->trace);
- if (ctx->emit_tile) {
- ctx->emit_tile(batch, tile);
- } else {
- ctx->screen->emit_ib(batch->gmem, batch->draw);
- }
- trace_end_draw_ib(&batch->trace);
- fd_reset_wfi(batch);
+ /* emit IB to drawcmds: */
+ trace_start_draw_ib(&batch->trace);
+ if (ctx->emit_tile) {
+ ctx->emit_tile(batch, tile);
+ } else {
+ ctx->screen->emit_ib(batch->gmem, batch->draw);
+ }
+ trace_end_draw_ib(&batch->trace);
+ fd_reset_wfi(batch);
- /* emit gmem2mem to transfer tile back to system memory: */
- ctx->emit_tile_gmem2mem(batch, tile);
- }
+ /* emit gmem2mem to transfer tile back to system memory: */
+ ctx->emit_tile_gmem2mem(batch, tile);
+ }
- if (ctx->emit_tile_fini)
- ctx->emit_tile_fini(batch);
+ if (ctx->emit_tile_fini)
+ ctx->emit_tile_fini(batch);
- simple_mtx_unlock(&ctx->gmem_lock);
+ simple_mtx_unlock(&ctx->gmem_lock);
}
static void
-render_sysmem(struct fd_batch *batch)
- assert_dt
+render_sysmem(struct fd_batch *batch) assert_dt
{
- struct fd_context *ctx = batch->ctx;
+ struct fd_context *ctx = batch->ctx;
- ctx->emit_sysmem_prep(batch);
+ ctx->emit_sysmem_prep(batch);
- if (ctx->query_prepare_tile)
- ctx->query_prepare_tile(batch, 0, batch->gmem);
+ if (ctx->query_prepare_tile)
+ ctx->query_prepare_tile(batch, 0, batch->gmem);
- if (!batch->nondraw) {
- trace_start_draw_ib(&batch->trace);
- }
- /* emit IB to drawcmds: */
- ctx->screen->emit_ib(batch->gmem, batch->draw);
+ if (!batch->nondraw) {
+ trace_start_draw_ib(&batch->trace);
+ }
+ /* emit IB to drawcmds: */
+ ctx->screen->emit_ib(batch->gmem, batch->draw);
- if (!batch->nondraw) {
- trace_end_draw_ib(&batch->trace);
- }
+ if (!batch->nondraw) {
+ trace_end_draw_ib(&batch->trace);
+ }
- fd_reset_wfi(batch);
+ fd_reset_wfi(batch);
- if (ctx->emit_sysmem_fini)
- ctx->emit_sysmem_fini(batch);
+ if (ctx->emit_sysmem_fini)
+ ctx->emit_sysmem_fini(batch);
}
static void
flush_ring(struct fd_batch *batch)
{
- uint32_t timestamp = 0;
- int out_fence_fd = -1;
+ uint32_t timestamp = 0;
+ int out_fence_fd = -1;
- if (FD_DBG(NOHW))
- return;
+ if (FD_DBG(NOHW))
+ return;
- fd_submit_flush(batch->submit, batch->in_fence_fd,
- batch->needs_out_fence_fd ? &out_fence_fd : NULL,
- ×tamp);
+ fd_submit_flush(batch->submit, batch->in_fence_fd,
+ batch->needs_out_fence_fd ? &out_fence_fd : NULL,
+ ×tamp);
- fd_fence_populate(batch->fence, timestamp, out_fence_fd);
+ fd_fence_populate(batch->fence, timestamp, out_fence_fd);
}
void
fd_gmem_render_tiles(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- bool sysmem = false;
-
- if (!batch->nondraw) {
- trace_flush_batch(&batch->trace, batch, batch->cleared,
- batch->gmem_reason, batch->num_draws);
- trace_framebuffer_state(&batch->trace, pfb);
- }
-
- if (ctx->emit_sysmem_prep && !batch->nondraw) {
- if (fd_autotune_use_bypass(&ctx->autotune, batch) &&
- !FD_DBG(NOBYPASS)) {
- sysmem = true;
- }
-
- /* For ARB_framebuffer_no_attachments: */
- if ((pfb->nr_cbufs == 0) && !pfb->zsbuf) {
- sysmem = true;
- }
- }
-
- if (FD_DBG(NOGMEM))
- sysmem = true;
-
- /* Layered rendering always needs bypass. */
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- struct pipe_surface *psurf = pfb->cbufs[i];
- if (!psurf)
- continue;
- if (psurf->u.tex.first_layer < psurf->u.tex.last_layer)
- sysmem = true;
- }
-
- /* Tessellation doesn't seem to support tiled rendering so fall back to
- * bypass.
- */
- if (batch->tessellation) {
- debug_assert(ctx->emit_sysmem_prep);
- sysmem = true;
- }
-
- fd_reset_wfi(batch);
-
- ctx->stats.batch_total++;
-
- if (batch->nondraw) {
- DBG("%p: rendering non-draw", batch);
- render_sysmem(batch);
- ctx->stats.batch_nondraw++;
- } else if (sysmem) {
- trace_render_sysmem(&batch->trace);
- if (ctx->query_prepare)
- ctx->query_prepare(batch, 1);
- render_sysmem(batch);
- ctx->stats.batch_sysmem++;
- } else {
- struct fd_gmem_stateobj *gmem = lookup_gmem_state(batch, false, false);
- batch->gmem_state = gmem;
- trace_render_gmem(&batch->trace, gmem->nbins_x, gmem->nbins_y,
- gmem->bin_w, gmem->bin_h);
- if (ctx->query_prepare)
- ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
- render_tiles(batch, gmem);
- batch->gmem_state = NULL;
-
- fd_screen_lock(ctx->screen);
- fd_gmem_reference(&gmem, NULL);
- fd_screen_unlock(ctx->screen);
-
- ctx->stats.batch_gmem++;
- }
-
- flush_ring(batch);
-
- u_trace_flush(&batch->trace);
+ struct fd_context *ctx = batch->ctx;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ bool sysmem = false;
+
+ if (!batch->nondraw) {
+ trace_flush_batch(&batch->trace, batch, batch->cleared,
+ batch->gmem_reason, batch->num_draws);
+ trace_framebuffer_state(&batch->trace, pfb);
+ }
+
+ if (ctx->emit_sysmem_prep && !batch->nondraw) {
+ if (fd_autotune_use_bypass(&ctx->autotune, batch) && !FD_DBG(NOBYPASS)) {
+ sysmem = true;
+ }
+
+ /* For ARB_framebuffer_no_attachments: */
+ if ((pfb->nr_cbufs == 0) && !pfb->zsbuf) {
+ sysmem = true;
+ }
+ }
+
+ if (FD_DBG(NOGMEM))
+ sysmem = true;
+
+ /* Layered rendering always needs bypass. */
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ struct pipe_surface *psurf = pfb->cbufs[i];
+ if (!psurf)
+ continue;
+ if (psurf->u.tex.first_layer < psurf->u.tex.last_layer)
+ sysmem = true;
+ }
+
+ /* Tessellation doesn't seem to support tiled rendering so fall back to
+ * bypass.
+ */
+ if (batch->tessellation) {
+ debug_assert(ctx->emit_sysmem_prep);
+ sysmem = true;
+ }
+
+ fd_reset_wfi(batch);
+
+ ctx->stats.batch_total++;
+
+ if (batch->nondraw) {
+ DBG("%p: rendering non-draw", batch);
+ render_sysmem(batch);
+ ctx->stats.batch_nondraw++;
+ } else if (sysmem) {
+ trace_render_sysmem(&batch->trace);
+ if (ctx->query_prepare)
+ ctx->query_prepare(batch, 1);
+ render_sysmem(batch);
+ ctx->stats.batch_sysmem++;
+ } else {
+ struct fd_gmem_stateobj *gmem = lookup_gmem_state(batch, false, false);
+ batch->gmem_state = gmem;
+ trace_render_gmem(&batch->trace, gmem->nbins_x, gmem->nbins_y,
+ gmem->bin_w, gmem->bin_h);
+ if (ctx->query_prepare)
+ ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
+ render_tiles(batch, gmem);
+ batch->gmem_state = NULL;
+
+ fd_screen_lock(ctx->screen);
+ fd_gmem_reference(&gmem, NULL);
+ fd_screen_unlock(ctx->screen);
+
+ ctx->stats.batch_gmem++;
+ }
+
+ flush_ring(batch);
+
+ u_trace_flush(&batch->trace);
}
/* Determine a worst-case estimate (ie. assuming we don't eliminate an
unsigned
fd_gmem_estimate_bins_per_pipe(struct fd_batch *batch)
{
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct fd_screen *screen = batch->ctx->screen;
- struct fd_gmem_stateobj *gmem = lookup_gmem_state(batch, !!pfb->zsbuf, true);
- unsigned nbins = gmem->maxpw * gmem->maxph;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ struct fd_screen *screen = batch->ctx->screen;
+ struct fd_gmem_stateobj *gmem = lookup_gmem_state(batch, !!pfb->zsbuf, true);
+ unsigned nbins = gmem->maxpw * gmem->maxph;
- fd_screen_lock(screen);
- fd_gmem_reference(&gmem, NULL);
- fd_screen_unlock(screen);
+ fd_screen_lock(screen);
+ fd_gmem_reference(&gmem, NULL);
+ fd_screen_unlock(screen);
- return nbins;
+ return nbins;
}
/* When deciding whether a tile needs mem2gmem, we need to take into
*/
bool
fd_gmem_needs_restore(struct fd_batch *batch, const struct fd_tile *tile,
- uint32_t buffers)
+ uint32_t buffers)
{
- if (!(batch->restore & buffers))
- return false;
+ if (!(batch->restore & buffers))
+ return false;
- return true;
+ return true;
}
void
fd_gmem_screen_init(struct pipe_screen *pscreen)
{
- struct fd_gmem_cache *cache = &fd_screen(pscreen)->gmem_cache;
+ struct fd_gmem_cache *cache = &fd_screen(pscreen)->gmem_cache;
- cache->ht = _mesa_hash_table_create(NULL, gmem_key_hash, gmem_key_equals);
- list_inithead(&cache->lru);
+ cache->ht = _mesa_hash_table_create(NULL, gmem_key_hash, gmem_key_equals);
+ list_inithead(&cache->lru);
}
void
fd_gmem_screen_fini(struct pipe_screen *pscreen)
{
- struct fd_gmem_cache *cache = &fd_screen(pscreen)->gmem_cache;
+ struct fd_gmem_cache *cache = &fd_screen(pscreen)->gmem_cache;
- _mesa_hash_table_destroy(cache->ht, NULL);
+ _mesa_hash_table_destroy(cache->ht, NULL);
}
/* per-pipe configuration for hw binning: */
struct fd_vsc_pipe {
- uint8_t x, y, w, h; /* VSC_PIPE[p].CONFIG */
+ uint8_t x, y, w, h; /* VSC_PIPE[p].CONFIG */
};
/* per-tile configuration for hw binning: */
struct fd_tile {
- uint8_t p; /* index into vsc_pipe[]s */
- uint8_t n; /* slot within pipe */
- uint16_t bin_w, bin_h;
- uint16_t xoff, yoff;
+ uint8_t p; /* index into vsc_pipe[]s */
+ uint8_t n; /* slot within pipe */
+ uint16_t bin_w, bin_h;
+ uint16_t xoff, yoff;
};
struct fd_gmem_stateobj {
- struct pipe_reference reference;
- struct fd_screen *screen;
- void *key;
-
- uint32_t cbuf_base[MAX_RENDER_TARGETS];
- uint32_t zsbuf_base[2];
- uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
- uint8_t zsbuf_cpp[2];
- uint16_t bin_h, nbins_y;
- uint16_t bin_w, nbins_x;
- uint16_t minx, miny;
- uint16_t width, height;
- uint16_t maxpw, maxph; /* maximum pipe width/height */
- uint8_t num_vsc_pipes; /* number of pipes for a20x */
-
- struct fd_vsc_pipe vsc_pipe[32];
- struct fd_tile tile[2048];
-
- struct list_head node;
+ struct pipe_reference reference;
+ struct fd_screen *screen;
+ void *key;
+
+ uint32_t cbuf_base[MAX_RENDER_TARGETS];
+ uint32_t zsbuf_base[2];
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
+ uint8_t zsbuf_cpp[2];
+ uint16_t bin_h, nbins_y;
+ uint16_t bin_w, nbins_x;
+ uint16_t minx, miny;
+ uint16_t width, height;
+ uint16_t maxpw, maxph; /* maximum pipe width/height */
+ uint8_t num_vsc_pipes; /* number of pipes for a20x */
+
+ struct fd_vsc_pipe vsc_pipe[32];
+ struct fd_tile tile[2048];
+
+ struct list_head node;
};
void __fd_gmem_destroy(struct fd_gmem_stateobj *gmem);
static inline void
fd_gmem_reference(struct fd_gmem_stateobj **ptr, struct fd_gmem_stateobj *gmem)
{
- struct fd_gmem_stateobj *old_gmem = *ptr;
+ struct fd_gmem_stateobj *old_gmem = *ptr;
- if (pipe_reference(&(*ptr)->reference, &gmem->reference))
- __fd_gmem_destroy(old_gmem);
+ if (pipe_reference(&(*ptr)->reference, &gmem->reference))
+ __fd_gmem_destroy(old_gmem);
- *ptr = gmem;
+ *ptr = gmem;
}
struct fd_gmem_cache {
- struct hash_table *ht;
- struct list_head lru;
+ struct hash_table *ht;
+ struct list_head lru;
};
struct fd_batch;
void fd_gmem_render_tiles(struct fd_batch *batch) assert_dt;
unsigned fd_gmem_estimate_bins_per_pipe(struct fd_batch *batch);
bool fd_gmem_needs_restore(struct fd_batch *batch, const struct fd_tile *tile,
- uint32_t buffers);
+ uint32_t buffers);
struct pipe_screen;
void fd_gmem_screen_init(struct pipe_screen *pscreen);
#include "util/u_simple_shaders.h"
-#include "freedreno_program.h"
#include "freedreno_context.h"
+#include "freedreno_program.h"
static void
-update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader, bool bound)
- assert_dt
+update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader,
+ bool bound) assert_dt
{
- if (bound) {
- ctx->bound_shader_stages |= BIT(shader);
- } else {
- ctx->bound_shader_stages &= ~BIT(shader);
- }
+ if (bound) {
+ ctx->bound_shader_stages |= BIT(shader);
+ } else {
+ ctx->bound_shader_stages &= ~BIT(shader);
+ }
}
static void
-fd_vs_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_vs_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->prog.vs = hwcso;
- fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG);
- update_bound_stage(ctx, PIPE_SHADER_VERTEX, !!hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.vs = hwcso;
+ fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG);
+ update_bound_stage(ctx, PIPE_SHADER_VERTEX, !!hwcso);
}
static void
-fd_tcs_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_tcs_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->prog.hs = hwcso;
- fd_context_dirty_shader(ctx, PIPE_SHADER_TESS_CTRL, FD_DIRTY_SHADER_PROG);
- update_bound_stage(ctx, PIPE_SHADER_TESS_CTRL, !!hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.hs = hwcso;
+ fd_context_dirty_shader(ctx, PIPE_SHADER_TESS_CTRL, FD_DIRTY_SHADER_PROG);
+ update_bound_stage(ctx, PIPE_SHADER_TESS_CTRL, !!hwcso);
}
static void
-fd_tes_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_tes_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->prog.ds = hwcso;
- fd_context_dirty_shader(ctx, PIPE_SHADER_TESS_EVAL, FD_DIRTY_SHADER_PROG);
- update_bound_stage(ctx, PIPE_SHADER_TESS_EVAL, !!hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.ds = hwcso;
+ fd_context_dirty_shader(ctx, PIPE_SHADER_TESS_EVAL, FD_DIRTY_SHADER_PROG);
+ update_bound_stage(ctx, PIPE_SHADER_TESS_EVAL, !!hwcso);
}
static void
-fd_gs_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_gs_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->prog.gs = hwcso;
- fd_context_dirty_shader(ctx, PIPE_SHADER_GEOMETRY, FD_DIRTY_SHADER_PROG);
- update_bound_stage(ctx, PIPE_SHADER_GEOMETRY, !!hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.gs = hwcso;
+ fd_context_dirty_shader(ctx, PIPE_SHADER_GEOMETRY, FD_DIRTY_SHADER_PROG);
+ update_bound_stage(ctx, PIPE_SHADER_GEOMETRY, !!hwcso);
}
static void
-fd_fs_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_fs_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->prog.fs = hwcso;
- fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_PROG);
- update_bound_stage(ctx, PIPE_SHADER_FRAGMENT, !!hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.fs = hwcso;
+ fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_PROG);
+ update_bound_stage(ctx, PIPE_SHADER_FRAGMENT, !!hwcso);
}
-static const char *solid_fs =
- "FRAG \n"
- "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 \n"
- "DCL CONST[0] \n"
- "DCL OUT[0], COLOR \n"
- " 0: MOV OUT[0], CONST[0] \n"
- " 1: END \n";
-
-static const char *solid_vs =
- "VERT \n"
- "DCL IN[0] \n"
- "DCL OUT[0], POSITION \n"
- " 0: MOV OUT[0], IN[0] \n"
- " 1: END \n";
-
-static void * assemble_tgsi(struct pipe_context *pctx,
- const char *src, bool frag)
+static const char *solid_fs = "FRAG \n"
+ "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 \n"
+ "DCL CONST[0] \n"
+ "DCL OUT[0], COLOR \n"
+ " 0: MOV OUT[0], CONST[0] \n"
+ " 1: END \n";
+
+static const char *solid_vs = "VERT \n"
+ "DCL IN[0] \n"
+ "DCL OUT[0], POSITION \n"
+ " 0: MOV OUT[0], IN[0] \n"
+ " 1: END \n";
+
+static void *
+assemble_tgsi(struct pipe_context *pctx, const char *src, bool frag)
{
- struct tgsi_token toks[32];
- struct pipe_shader_state cso = {
- .tokens = toks,
- };
-
- bool ret = tgsi_text_translate(src, toks, ARRAY_SIZE(toks));
- assume(ret);
-
- if (frag)
- return pctx->create_fs_state(pctx, &cso);
- else
- return pctx->create_vs_state(pctx, &cso);
+ struct tgsi_token toks[32];
+ struct pipe_shader_state cso = {
+ .tokens = toks,
+ };
+
+ bool ret = tgsi_text_translate(src, toks, ARRAY_SIZE(toks));
+ assume(ret);
+
+ if (frag)
+ return pctx->create_fs_state(pctx, &cso);
+ else
+ return pctx->create_vs_state(pctx, &cso);
}
/* the correct semantic to use for the texcoord varying depends on pipe-cap: */
static enum tgsi_semantic
texcoord_semantic(struct pipe_context *pctx)
{
- struct pipe_screen *pscreen = pctx->screen;
+ struct pipe_screen *pscreen = pctx->screen;
- if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_TEXCOORD)) {
- return TGSI_SEMANTIC_TEXCOORD;
- } else {
- return TGSI_SEMANTIC_GENERIC;
- }
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_TEXCOORD)) {
+ return TGSI_SEMANTIC_TEXCOORD;
+ } else {
+ return TGSI_SEMANTIC_GENERIC;
+ }
}
static void *
fd_prog_blit_vs(struct pipe_context *pctx)
{
- struct ureg_program *ureg;
+ struct ureg_program *ureg;
- ureg = ureg_create(PIPE_SHADER_VERTEX);
- if (!ureg)
- return NULL;
+ ureg = ureg_create(PIPE_SHADER_VERTEX);
+ if (!ureg)
+ return NULL;
- struct ureg_src in0 = ureg_DECL_vs_input(ureg, 0);
- struct ureg_src in1 = ureg_DECL_vs_input(ureg, 1);
+ struct ureg_src in0 = ureg_DECL_vs_input(ureg, 0);
+ struct ureg_src in1 = ureg_DECL_vs_input(ureg, 1);
- struct ureg_dst out0 = ureg_DECL_output(ureg, texcoord_semantic(pctx), 0);
- struct ureg_dst out1 = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 1);
+ struct ureg_dst out0 = ureg_DECL_output(ureg, texcoord_semantic(pctx), 0);
+ struct ureg_dst out1 = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 1);
- ureg_MOV(ureg, out0, in0);
- ureg_MOV(ureg, out1, in1);
+ ureg_MOV(ureg, out0, in0);
+ ureg_MOV(ureg, out1, in1);
- ureg_END(ureg);
+ ureg_END(ureg);
- return ureg_create_shader_and_destroy(ureg, pctx);
+ return ureg_create_shader_and_destroy(ureg, pctx);
}
static void *
fd_prog_blit_fs(struct pipe_context *pctx, int rts, bool depth)
{
- int i;
- struct ureg_src tc;
- struct ureg_program *ureg;
-
- debug_assert(rts <= MAX_RENDER_TARGETS);
-
- ureg = ureg_create(PIPE_SHADER_FRAGMENT);
- if (!ureg)
- return NULL;
-
- tc = ureg_DECL_fs_input(
- ureg, texcoord_semantic(pctx), 0, TGSI_INTERPOLATE_PERSPECTIVE);
- for (i = 0; i < rts; i++)
- ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i),
- TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i));
- if (depth)
- ureg_TEX(ureg,
- ureg_writemask(
- ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
- TGSI_WRITEMASK_Z),
- TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts));
-
- ureg_END(ureg);
-
- return ureg_create_shader_and_destroy(ureg, pctx);
+ int i;
+ struct ureg_src tc;
+ struct ureg_program *ureg;
+
+ debug_assert(rts <= MAX_RENDER_TARGETS);
+
+ ureg = ureg_create(PIPE_SHADER_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ tc = ureg_DECL_fs_input(ureg, texcoord_semantic(pctx), 0,
+ TGSI_INTERPOLATE_PERSPECTIVE);
+ for (i = 0; i < rts; i++)
+ ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i),
+ TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i));
+ if (depth)
+ ureg_TEX(ureg,
+ ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
+ TGSI_WRITEMASK_Z),
+ TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts));
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pctx);
}
-
-void fd_prog_init(struct pipe_context *pctx)
+void
+fd_prog_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- int i;
-
- pctx->bind_vs_state = fd_vs_state_bind;
- pctx->bind_tcs_state = fd_tcs_state_bind;
- pctx->bind_tes_state = fd_tes_state_bind;
- pctx->bind_gs_state = fd_gs_state_bind;
- pctx->bind_fs_state = fd_fs_state_bind;
-
- ctx->solid_prog.fs = assemble_tgsi(pctx, solid_fs, true);
- ctx->solid_prog.vs = assemble_tgsi(pctx, solid_vs, false);
-
- if (ctx->screen->gpu_id >= 600) {
- ctx->solid_layered_prog.fs = assemble_tgsi(pctx, solid_fs, true);
- ctx->solid_layered_prog.vs =
- util_make_layered_clear_vertex_shader(pctx);
- }
-
- if (ctx->screen->gpu_id >= 500)
- return;
-
- ctx->blit_prog[0].vs = fd_prog_blit_vs(pctx);
- ctx->blit_prog[0].fs = fd_prog_blit_fs(pctx, 1, false);
-
- if (ctx->screen->gpu_id < 300)
- return;
-
- for (i = 1; i < ctx->screen->max_rts; i++) {
- ctx->blit_prog[i].vs = ctx->blit_prog[0].vs;
- ctx->blit_prog[i].fs = fd_prog_blit_fs(pctx, i + 1, false);
- }
-
- ctx->blit_z.vs = ctx->blit_prog[0].vs;
- ctx->blit_z.fs = fd_prog_blit_fs(pctx, 0, true);
- ctx->blit_zs.vs = ctx->blit_prog[0].vs;
- ctx->blit_zs.fs = fd_prog_blit_fs(pctx, 1, true);
+ struct fd_context *ctx = fd_context(pctx);
+ int i;
+
+ pctx->bind_vs_state = fd_vs_state_bind;
+ pctx->bind_tcs_state = fd_tcs_state_bind;
+ pctx->bind_tes_state = fd_tes_state_bind;
+ pctx->bind_gs_state = fd_gs_state_bind;
+ pctx->bind_fs_state = fd_fs_state_bind;
+
+ ctx->solid_prog.fs = assemble_tgsi(pctx, solid_fs, true);
+ ctx->solid_prog.vs = assemble_tgsi(pctx, solid_vs, false);
+
+ if (ctx->screen->gpu_id >= 600) {
+ ctx->solid_layered_prog.fs = assemble_tgsi(pctx, solid_fs, true);
+ ctx->solid_layered_prog.vs = util_make_layered_clear_vertex_shader(pctx);
+ }
+
+ if (ctx->screen->gpu_id >= 500)
+ return;
+
+ ctx->blit_prog[0].vs = fd_prog_blit_vs(pctx);
+ ctx->blit_prog[0].fs = fd_prog_blit_fs(pctx, 1, false);
+
+ if (ctx->screen->gpu_id < 300)
+ return;
+
+ for (i = 1; i < ctx->screen->max_rts; i++) {
+ ctx->blit_prog[i].vs = ctx->blit_prog[0].vs;
+ ctx->blit_prog[i].fs = fd_prog_blit_fs(pctx, i + 1, false);
+ }
+
+ ctx->blit_z.vs = ctx->blit_prog[0].vs;
+ ctx->blit_z.fs = fd_prog_blit_fs(pctx, 0, true);
+ ctx->blit_zs.vs = ctx->blit_prog[0].vs;
+ ctx->blit_zs.fs = fd_prog_blit_fs(pctx, 1, true);
}
-void fd_prog_fini(struct pipe_context *pctx)
+void
+fd_prog_fini(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
- int i;
+ struct fd_context *ctx = fd_context(pctx);
+ int i;
- pctx->delete_vs_state(pctx, ctx->solid_prog.vs);
- pctx->delete_fs_state(pctx, ctx->solid_prog.fs);
+ pctx->delete_vs_state(pctx, ctx->solid_prog.vs);
+ pctx->delete_fs_state(pctx, ctx->solid_prog.fs);
- if (ctx->screen->gpu_id >= 600) {
- pctx->delete_vs_state(pctx, ctx->solid_layered_prog.vs);
- pctx->delete_fs_state(pctx, ctx->solid_layered_prog.fs);
- }
+ if (ctx->screen->gpu_id >= 600) {
+ pctx->delete_vs_state(pctx, ctx->solid_layered_prog.vs);
+ pctx->delete_fs_state(pctx, ctx->solid_layered_prog.fs);
+ }
- if (ctx->screen->gpu_id >= 500)
- return;
+ if (ctx->screen->gpu_id >= 500)
+ return;
- pctx->delete_vs_state(pctx, ctx->blit_prog[0].vs);
- pctx->delete_fs_state(pctx, ctx->blit_prog[0].fs);
+ pctx->delete_vs_state(pctx, ctx->blit_prog[0].vs);
+ pctx->delete_fs_state(pctx, ctx->blit_prog[0].fs);
- if (ctx->screen->gpu_id < 300)
- return;
+ if (ctx->screen->gpu_id < 300)
+ return;
- for (i = 1; i < ctx->screen->max_rts; i++)
- pctx->delete_fs_state(pctx, ctx->blit_prog[i].fs);
- pctx->delete_fs_state(pctx, ctx->blit_z.fs);
- pctx->delete_fs_state(pctx, ctx->blit_zs.fs);
+ for (i = 1; i < ctx->screen->max_rts; i++)
+ pctx->delete_fs_state(pctx, ctx->blit_prog[i].fs);
+ pctx->delete_fs_state(pctx, ctx->blit_z.fs);
+ pctx->delete_fs_state(pctx, ctx->blit_zs.fs);
}
#include "pipe/p_state.h"
#include "util/u_memory.h"
+#include "freedreno_context.h"
#include "freedreno_query.h"
-#include "freedreno_query_sw.h"
#include "freedreno_query_hw.h"
-#include "freedreno_context.h"
+#include "freedreno_query_sw.h"
#include "freedreno_util.h"
/*
static struct pipe_query *
fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_query *q = NULL;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_query *q = NULL;
- if (ctx->create_query)
- q = ctx->create_query(ctx, query_type, index);
- if (!q)
- q = fd_sw_create_query(ctx, query_type, index);
+ if (ctx->create_query)
+ q = ctx->create_query(ctx, query_type, index);
+ if (!q)
+ q = fd_sw_create_query(ctx, query_type, index);
- return (struct pipe_query *) q;
+ return (struct pipe_query *)q;
}
static void
-fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq)
- in_dt
+fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq) in_dt
{
- struct fd_query *q = fd_query(pq);
- q->funcs->destroy_query(fd_context(pctx), q);
+ struct fd_query *q = fd_query(pq);
+ q->funcs->destroy_query(fd_context(pctx), q);
}
static bool
-fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq)
- in_dt
+fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq) in_dt
{
- struct fd_query *q = fd_query(pq);
+ struct fd_query *q = fd_query(pq);
- q->funcs->begin_query(fd_context(pctx), q);
+ q->funcs->begin_query(fd_context(pctx), q);
- return true;
+ return true;
}
static bool
-fd_end_query(struct pipe_context *pctx, struct pipe_query *pq)
- in_dt
+fd_end_query(struct pipe_context *pctx, struct pipe_query *pq) in_dt
{
- struct fd_query *q = fd_query(pq);
+ struct fd_query *q = fd_query(pq);
- /* there are a couple special cases, which don't have
- * a matching ->begin_query():
- */
- if (skip_begin_query(q->type))
- fd_begin_query(pctx, pq);
+ /* there are a couple special cases, which don't have
+ * a matching ->begin_query():
+ */
+ if (skip_begin_query(q->type))
+ fd_begin_query(pctx, pq);
- q->funcs->end_query(fd_context(pctx), q);
+ q->funcs->end_query(fd_context(pctx), q);
- return true;
+ return true;
}
static bool
-fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
- bool wait, union pipe_query_result *result)
+fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, bool wait,
+ union pipe_query_result *result)
{
- struct fd_query *q = fd_query(pq);
+ struct fd_query *q = fd_query(pq);
- util_query_clear_result(result, q->type);
+ util_query_clear_result(result, q->type);
- return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
+ return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
}
static void
fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
- bool condition, enum pipe_render_cond_flag mode)
- in_dt
+ bool condition, enum pipe_render_cond_flag mode) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->cond_query = pq;
- ctx->cond_cond = condition;
- ctx->cond_mode = mode;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->cond_query = pq;
+ ctx->cond_cond = condition;
+ ctx->cond_mode = mode;
}
-#define _Q(_name, _query_type, _type, _result_type) { \
- .name = _name, \
- .query_type = _query_type, \
- .type = PIPE_DRIVER_QUERY_TYPE_ ## _type, \
- .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \
- .group_id = ~(unsigned)0, \
-}
+#define _Q(_name, _query_type, _type, _result_type) \
+ { \
+ .name = _name, .query_type = _query_type, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##_type, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##_result_type, \
+ .group_id = ~(unsigned)0, \
+ }
-#define FQ(_name, _query_type, _type, _result_type) \
- _Q(_name, FD_QUERY_ ## _query_type, _type, _result_type)
+#define FQ(_name, _query_type, _type, _result_type) \
+ _Q(_name, FD_QUERY_##_query_type, _type, _result_type)
-#define PQ(_name, _query_type, _type, _result_type) \
- _Q(_name, PIPE_QUERY_ ## _query_type, _type, _result_type)
+#define PQ(_name, _query_type, _type, _result_type) \
+ _Q(_name, PIPE_QUERY_##_query_type, _type, _result_type)
static const struct pipe_driver_query_info sw_query_list[] = {
- FQ("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
- FQ("batches", BATCH_TOTAL, UINT64, AVERAGE),
- FQ("batches-sysmem", BATCH_SYSMEM, UINT64, AVERAGE),
- FQ("batches-gmem", BATCH_GMEM, UINT64, AVERAGE),
- FQ("batches-nondraw", BATCH_NONDRAW, UINT64, AVERAGE),
- FQ("restores", BATCH_RESTORE, UINT64, AVERAGE),
- PQ("prims-emitted", PRIMITIVES_EMITTED, UINT64, AVERAGE),
- FQ("staging", STAGING_UPLOADS, UINT64, AVERAGE),
- FQ("shadow", SHADOW_UPLOADS, UINT64, AVERAGE),
- FQ("vsregs", VS_REGS, FLOAT, AVERAGE),
- FQ("fsregs", FS_REGS, FLOAT, AVERAGE),
+ FQ("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+ FQ("batches", BATCH_TOTAL, UINT64, AVERAGE),
+ FQ("batches-sysmem", BATCH_SYSMEM, UINT64, AVERAGE),
+ FQ("batches-gmem", BATCH_GMEM, UINT64, AVERAGE),
+ FQ("batches-nondraw", BATCH_NONDRAW, UINT64, AVERAGE),
+ FQ("restores", BATCH_RESTORE, UINT64, AVERAGE),
+ PQ("prims-emitted", PRIMITIVES_EMITTED, UINT64, AVERAGE),
+ FQ("staging", STAGING_UPLOADS, UINT64, AVERAGE),
+ FQ("shadow", SHADOW_UPLOADS, UINT64, AVERAGE),
+ FQ("vsregs", VS_REGS, FLOAT, AVERAGE),
+ FQ("fsregs", FS_REGS, FLOAT, AVERAGE),
};
static int
-fd_get_driver_query_info(struct pipe_screen *pscreen,
- unsigned index, struct pipe_driver_query_info *info)
+fd_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- if (!info)
- return ARRAY_SIZE(sw_query_list) + screen->num_perfcntr_queries;
+ if (!info)
+ return ARRAY_SIZE(sw_query_list) + screen->num_perfcntr_queries;
- if (index >= ARRAY_SIZE(sw_query_list)) {
- index -= ARRAY_SIZE(sw_query_list);
- if (index >= screen->num_perfcntr_queries)
- return 0;
- *info = screen->perfcntr_queries[index];
- return 1;
- }
+ if (index >= ARRAY_SIZE(sw_query_list)) {
+ index -= ARRAY_SIZE(sw_query_list);
+ if (index >= screen->num_perfcntr_queries)
+ return 0;
+ *info = screen->perfcntr_queries[index];
+ return 1;
+ }
- *info = sw_query_list[index];
- return 1;
+ *info = sw_query_list[index];
+ return 1;
}
static int
fd_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
- struct pipe_driver_query_group_info *info)
+ struct pipe_driver_query_group_info *info)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- if (!info)
- return screen->num_perfcntr_groups;
+ if (!info)
+ return screen->num_perfcntr_groups;
- if (index >= screen->num_perfcntr_groups)
- return 0;
+ if (index >= screen->num_perfcntr_groups)
+ return 0;
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[index];
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[index];
- info->name = g->name;
- info->max_active_queries = g->num_counters;
- info->num_queries = g->num_countables;
+ info->name = g->name;
+ info->max_active_queries = g->num_counters;
+ info->num_queries = g->num_countables;
- return 1;
+ return 1;
}
static void
-fd_set_active_query_state(struct pipe_context *pctx, bool enable)
- assert_dt
+fd_set_active_query_state(struct pipe_context *pctx, bool enable) assert_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->active_queries = enable;
- ctx->update_active_queries = true;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->active_queries = enable;
+ ctx->update_active_queries = true;
}
static enum pipe_driver_query_type
query_type(enum fd_perfcntr_type type)
{
-#define ENUM(t) case FD_PERFCNTR_ ## t: return PIPE_DRIVER_QUERY_ ## t
- switch (type) {
- ENUM(TYPE_UINT64);
- ENUM(TYPE_UINT);
- ENUM(TYPE_FLOAT);
- ENUM(TYPE_PERCENTAGE);
- ENUM(TYPE_BYTES);
- ENUM(TYPE_MICROSECONDS);
- ENUM(TYPE_HZ);
- ENUM(TYPE_DBM);
- ENUM(TYPE_TEMPERATURE);
- ENUM(TYPE_VOLTS);
- ENUM(TYPE_AMPS);
- ENUM(TYPE_WATTS);
- default:
- unreachable("bad type");
- return 0;
- }
+#define ENUM(t) \
+ case FD_PERFCNTR_##t: \
+ return PIPE_DRIVER_QUERY_##t
+ switch (type) {
+ ENUM(TYPE_UINT64);
+ ENUM(TYPE_UINT);
+ ENUM(TYPE_FLOAT);
+ ENUM(TYPE_PERCENTAGE);
+ ENUM(TYPE_BYTES);
+ ENUM(TYPE_MICROSECONDS);
+ ENUM(TYPE_HZ);
+ ENUM(TYPE_DBM);
+ ENUM(TYPE_TEMPERATURE);
+ ENUM(TYPE_VOLTS);
+ ENUM(TYPE_AMPS);
+ ENUM(TYPE_WATTS);
+ default:
+ unreachable("bad type");
+ return 0;
+ }
}
static enum pipe_driver_query_result_type
query_result_type(enum fd_perfcntr_result_type type)
{
- switch (type) {
- ENUM(RESULT_TYPE_AVERAGE);
- ENUM(RESULT_TYPE_CUMULATIVE);
- default:
- unreachable("bad type");
- return 0;
- }
+ switch (type) {
+ ENUM(RESULT_TYPE_AVERAGE);
+ ENUM(RESULT_TYPE_CUMULATIVE);
+ default:
+ unreachable("bad type");
+ return 0;
+ }
}
static void
setup_perfcntr_query_info(struct fd_screen *screen)
{
- unsigned num_queries = 0;
-
- for (unsigned i = 0; i < screen->num_perfcntr_groups; i++)
- num_queries += screen->perfcntr_groups[i].num_countables;
-
- screen->perfcntr_queries =
- calloc(num_queries, sizeof(screen->perfcntr_queries[0]));
- screen->num_perfcntr_queries = num_queries;
-
- unsigned idx = 0;
- for (unsigned i = 0; i < screen->num_perfcntr_groups; i++) {
- const struct fd_perfcntr_group *g = &screen->perfcntr_groups[i];
- for (unsigned j = 0; j < g->num_countables; j++) {
- struct pipe_driver_query_info *info =
- &screen->perfcntr_queries[idx];
- const struct fd_perfcntr_countable *c =
- &g->countables[j];
-
- info->name = c->name;
- info->query_type = FD_QUERY_FIRST_PERFCNTR + idx;
- info->type = query_type(c->query_type);
- info->result_type = query_result_type(c->result_type);
- info->group_id = i;
- info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
-
- idx++;
- }
- }
+ unsigned num_queries = 0;
+
+ for (unsigned i = 0; i < screen->num_perfcntr_groups; i++)
+ num_queries += screen->perfcntr_groups[i].num_countables;
+
+ screen->perfcntr_queries =
+ calloc(num_queries, sizeof(screen->perfcntr_queries[0]));
+ screen->num_perfcntr_queries = num_queries;
+
+ unsigned idx = 0;
+ for (unsigned i = 0; i < screen->num_perfcntr_groups; i++) {
+ const struct fd_perfcntr_group *g = &screen->perfcntr_groups[i];
+ for (unsigned j = 0; j < g->num_countables; j++) {
+ struct pipe_driver_query_info *info = &screen->perfcntr_queries[idx];
+ const struct fd_perfcntr_countable *c = &g->countables[j];
+
+ info->name = c->name;
+ info->query_type = FD_QUERY_FIRST_PERFCNTR + idx;
+ info->type = query_type(c->query_type);
+ info->result_type = query_result_type(c->result_type);
+ info->group_id = i;
+ info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+
+ idx++;
+ }
+ }
}
void
fd_query_screen_init(struct pipe_screen *pscreen)
{
- pscreen->get_driver_query_info = fd_get_driver_query_info;
- pscreen->get_driver_query_group_info = fd_get_driver_query_group_info;
- setup_perfcntr_query_info(fd_screen(pscreen));
+ pscreen->get_driver_query_info = fd_get_driver_query_info;
+ pscreen->get_driver_query_group_info = fd_get_driver_query_group_info;
+ setup_perfcntr_query_info(fd_screen(pscreen));
}
void
fd_query_context_init(struct pipe_context *pctx)
{
- pctx->create_query = fd_create_query;
- pctx->destroy_query = fd_destroy_query;
- pctx->begin_query = fd_begin_query;
- pctx->end_query = fd_end_query;
- pctx->get_query_result = fd_get_query_result;
- pctx->set_active_query_state = fd_set_active_query_state;
- pctx->render_condition = fd_render_condition;
+ pctx->create_query = fd_create_query;
+ pctx->destroy_query = fd_destroy_query;
+ pctx->begin_query = fd_begin_query;
+ pctx->end_query = fd_end_query;
+ pctx->get_query_result = fd_get_query_result;
+ pctx->set_active_query_state = fd_set_active_query_state;
+ pctx->render_condition = fd_render_condition;
}
struct fd_query;
struct fd_query_funcs {
- void (*destroy_query)(struct fd_context *ctx,
- struct fd_query *q) dt;
- void (*begin_query)(struct fd_context *ctx, struct fd_query *q) dt;
- void (*end_query)(struct fd_context *ctx, struct fd_query *q) dt;
- bool (*get_query_result)(struct fd_context *ctx,
- struct fd_query *q, bool wait,
- union pipe_query_result *result);
+ void (*destroy_query)(struct fd_context *ctx, struct fd_query *q) dt;
+ void (*begin_query)(struct fd_context *ctx, struct fd_query *q) dt;
+ void (*end_query)(struct fd_context *ctx, struct fd_query *q) dt;
+ bool (*get_query_result)(struct fd_context *ctx, struct fd_query *q,
+ bool wait, union pipe_query_result *result);
};
struct fd_query {
- struct threaded_query base;
+ struct threaded_query base;
- const struct fd_query_funcs *funcs;
- int type;
- unsigned index;
+ const struct fd_query_funcs *funcs;
+ int type;
+ unsigned index;
};
static inline struct fd_query *
fd_query(struct pipe_query *pq)
{
- return (struct fd_query *)pq;
+ return (struct fd_query *)pq;
}
-#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
-#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
-#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
-#define FD_QUERY_BATCH_NONDRAW (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* compute/blit batches */
-#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */
-#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads using staging blit */
-#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 7) /* texture/buffer uploads that shadowed rsc */
-#define FD_QUERY_VS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 8) /* avg # of VS registers (scaled up by 100x) */
-#define FD_QUERY_FS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 9) /* avg # of VS registers (scaled up by 100x) */
+#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define FD_QUERY_BATCH_TOTAL \
+ (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
+#define FD_QUERY_BATCH_SYSMEM \
+ (PIPE_QUERY_DRIVER_SPECIFIC + \
+ 2) /* batches using system memory (GMEM bypass) */
+#define FD_QUERY_BATCH_GMEM \
+ (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
+#define FD_QUERY_BATCH_NONDRAW \
+ (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* compute/blit batches */
+#define FD_QUERY_BATCH_RESTORE \
+ (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */
+#define FD_QUERY_STAGING_UPLOADS \
+ (PIPE_QUERY_DRIVER_SPECIFIC + \
+ 6) /* texture/buffer uploads using staging blit */
+#define FD_QUERY_SHADOW_UPLOADS \
+ (PIPE_QUERY_DRIVER_SPECIFIC + \
+ 7) /* texture/buffer uploads that shadowed rsc */
+#define FD_QUERY_VS_REGS \
+ (PIPE_QUERY_DRIVER_SPECIFIC + \
+ 8) /* avg # of VS registers (scaled up by 100x) */
+#define FD_QUERY_FS_REGS \
+ (PIPE_QUERY_DRIVER_SPECIFIC + \
+ 9) /* avg # of VS registers (scaled up by 100x) */
/* insert any new non-perfcntr queries here, the first perfcntr index
* needs to come last!
*/
-#define FD_QUERY_FIRST_PERFCNTR (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define FD_QUERY_FIRST_PERFCNTR (PIPE_QUERY_DRIVER_SPECIFIC + 10)
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
static inline bool
skip_begin_query(int type)
{
- switch (type) {
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_GPU_FINISHED:
- return true;
- default:
- return false;
- }
+ switch (type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_GPU_FINISHED:
+ return true;
+ default:
+ return false;
+ }
}
/* maps query_type to sample provider idx: */
-static inline
-int pidx(unsigned query_type)
+static inline int
+pidx(unsigned query_type)
{
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- return 0;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- return 1;
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- return 2;
- /* TODO currently queries only emitted in main pass (not in binning pass)..
- * which is fine for occlusion query, but pretty much not anything else.
- */
- case PIPE_QUERY_TIME_ELAPSED:
- return 3;
- case PIPE_QUERY_TIMESTAMP:
- return 4;
-
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- return 5;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- return 6;
-
- default:
- return -1;
- }
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ return 0;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ return 1;
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ return 2;
+ /* TODO currently queries only emitted in main pass (not in binning pass)..
+ * which is fine for occlusion query, but pretty much not anything else.
+ */
+ case PIPE_QUERY_TIME_ELAPSED:
+ return 3;
+ case PIPE_QUERY_TIMESTAMP:
+ return 4;
+
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ return 5;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ return 6;
+
+ default:
+ return -1;
+ }
}
#endif /* FREEDRENO_QUERY_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
-#include "freedreno_query_acc.h"
#include "freedreno_context.h"
+#include "freedreno_query_acc.h"
#include "freedreno_resource.h"
#include "freedreno_util.h"
static void
-fd_acc_destroy_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_acc_destroy_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_acc_query *aq = fd_acc_query(q);
+ struct fd_acc_query *aq = fd_acc_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- pipe_resource_reference(&aq->prsc, NULL);
- list_del(&aq->node);
+ pipe_resource_reference(&aq->prsc, NULL);
+ list_del(&aq->node);
- free(aq->query_data);
- free(aq);
+ free(aq->query_data);
+ free(aq);
}
static void
realloc_query_bo(struct fd_context *ctx, struct fd_acc_query *aq)
{
- struct fd_resource *rsc;
- void *map;
+ struct fd_resource *rsc;
+ void *map;
- pipe_resource_reference(&aq->prsc, NULL);
+ pipe_resource_reference(&aq->prsc, NULL);
- aq->prsc = pipe_buffer_create(&ctx->screen->base,
- PIPE_BIND_QUERY_BUFFER, 0, 0x1000);
+ aq->prsc =
+ pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER, 0, 0x1000);
- /* don't assume the buffer is zero-initialized: */
- rsc = fd_resource(aq->prsc);
+ /* don't assume the buffer is zero-initialized: */
+ rsc = fd_resource(aq->prsc);
- fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_WRITE);
+ fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_WRITE);
- map = fd_bo_map(rsc->bo);
- memset(map, 0, aq->size);
- fd_bo_cpu_fini(rsc->bo);
+ map = fd_bo_map(rsc->bo);
+ memset(map, 0, aq->size);
+ fd_bo_cpu_fini(rsc->bo);
}
static void
-fd_acc_query_pause(struct fd_acc_query *aq)
- assert_dt
+fd_acc_query_pause(struct fd_acc_query *aq) assert_dt
{
- const struct fd_acc_sample_provider *p = aq->provider;
+ const struct fd_acc_sample_provider *p = aq->provider;
- if (!aq->batch)
- return;
+ if (!aq->batch)
+ return;
- p->pause(aq, aq->batch);
- aq->batch = NULL;
+ p->pause(aq, aq->batch);
+ aq->batch = NULL;
}
static void
-fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch)
- assert_dt
+fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
- const struct fd_acc_sample_provider *p = aq->provider;
+ const struct fd_acc_sample_provider *p = aq->provider;
- aq->batch = batch;
- p->resume(aq, aq->batch);
+ aq->batch = batch;
+ p->resume(aq, aq->batch);
- fd_screen_lock(batch->ctx->screen);
- fd_batch_resource_write(batch, fd_resource(aq->prsc));
- fd_screen_unlock(batch->ctx->screen);
+ fd_screen_lock(batch->ctx->screen);
+ fd_batch_resource_write(batch, fd_resource(aq->prsc));
+ fd_screen_unlock(batch->ctx->screen);
}
static void
-fd_acc_begin_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_acc_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_acc_query *aq = fd_acc_query(q);
+ struct fd_acc_query *aq = fd_acc_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- /* ->begin_query() discards previous results, so realloc bo: */
- realloc_query_bo(ctx, aq);
+ /* ->begin_query() discards previous results, so realloc bo: */
+ realloc_query_bo(ctx, aq);
- /* Signal that we need to update the active queries on the next draw */
- ctx->update_active_queries = true;
+ /* Signal that we need to update the active queries on the next draw */
+ ctx->update_active_queries = true;
- /* add to active list: */
- assert(list_is_empty(&aq->node));
- list_addtail(&aq->node, &ctx->acc_active_queries);
+ /* add to active list: */
+ assert(list_is_empty(&aq->node));
+ list_addtail(&aq->node, &ctx->acc_active_queries);
- /* TIMESTAMP/GPU_FINISHED and don't do normal bracketing at draw time, we
- * need to just emit the capture at this moment.
- */
- if (skip_begin_query(q->type)) {
- struct fd_batch *batch = fd_context_batch_locked(ctx);
- fd_acc_query_resume(aq, batch);
- fd_batch_unlock_submit(batch);
- fd_batch_reference(&batch, NULL);
- }
+ /* TIMESTAMP/GPU_FINISHED and don't do normal bracketing at draw time, we
+ * need to just emit the capture at this moment.
+ */
+ if (skip_begin_query(q->type)) {
+ struct fd_batch *batch = fd_context_batch_locked(ctx);
+ fd_acc_query_resume(aq, batch);
+ fd_batch_unlock_submit(batch);
+ fd_batch_reference(&batch, NULL);
+ }
}
static void
-fd_acc_end_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_acc_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_acc_query *aq = fd_acc_query(q);
+ struct fd_acc_query *aq = fd_acc_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- fd_acc_query_pause(aq);
+ fd_acc_query_pause(aq);
- /* remove from active list: */
- list_delinit(&aq->node);
+ /* remove from active list: */
+ list_delinit(&aq->node);
}
static bool
-fd_acc_get_query_result(struct fd_context *ctx, struct fd_query *q,
- bool wait, union pipe_query_result *result)
+fd_acc_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
+ union pipe_query_result *result)
{
- struct fd_acc_query *aq = fd_acc_query(q);
- const struct fd_acc_sample_provider *p = aq->provider;
- struct fd_resource *rsc = fd_resource(aq->prsc);
-
- DBG("%p: wait=%d", q, wait);
-
- assert(list_is_empty(&aq->node));
-
- /* if !wait, then check the last sample (the one most likely to
- * not be ready yet) and bail if it is not ready:
- */
- if (!wait) {
- int ret;
-
- if (pending(rsc, false)) {
- assert(!q->base.flushed);
- tc_assert_driver_thread(ctx->tc);
-
- /* piglit spec@arb_occlusion_query@occlusion_query_conform
- * test, and silly apps perhaps, get stuck in a loop trying
- * to get query result forever with wait==false.. we don't
- * wait to flush unnecessarily but we also don't want to
- * spin forever:
- */
- if (aq->no_wait_cnt++ > 5) {
- fd_context_access_begin(ctx);
- fd_batch_flush(rsc->track->write_batch);
- fd_context_access_end(ctx);
- }
- return false;
- }
-
- ret = fd_resource_wait(ctx, rsc,
- DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
- if (ret)
- return false;
-
- fd_bo_cpu_fini(rsc->bo);
- }
-
- if (rsc->track->write_batch) {
- tc_assert_driver_thread(ctx->tc);
- fd_context_access_begin(ctx);
- fd_batch_flush(rsc->track->write_batch);
- fd_context_access_end(ctx);
- }
-
- /* get the result: */
- fd_resource_wait(ctx, rsc, DRM_FREEDRENO_PREP_READ);
-
- void *ptr = fd_bo_map(rsc->bo);
- p->result(aq, ptr, result);
- fd_bo_cpu_fini(rsc->bo);
-
- return true;
+ struct fd_acc_query *aq = fd_acc_query(q);
+ const struct fd_acc_sample_provider *p = aq->provider;
+ struct fd_resource *rsc = fd_resource(aq->prsc);
+
+ DBG("%p: wait=%d", q, wait);
+
+ assert(list_is_empty(&aq->node));
+
+ /* if !wait, then check the last sample (the one most likely to
+ * not be ready yet) and bail if it is not ready:
+ */
+ if (!wait) {
+ int ret;
+
+ if (pending(rsc, false)) {
+ assert(!q->base.flushed);
+ tc_assert_driver_thread(ctx->tc);
+
+ /* piglit spec@arb_occlusion_query@occlusion_query_conform
+ * test, and silly apps perhaps, get stuck in a loop trying
+ * to get query result forever with wait==false.. we don't
+ * wait to flush unnecessarily but we also don't want to
+ * spin forever:
+ */
+ if (aq->no_wait_cnt++ > 5) {
+ fd_context_access_begin(ctx);
+ fd_batch_flush(rsc->track->write_batch);
+ fd_context_access_end(ctx);
+ }
+ return false;
+ }
+
+ ret = fd_resource_wait(
+ ctx, rsc, DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
+ if (ret)
+ return false;
+
+ fd_bo_cpu_fini(rsc->bo);
+ }
+
+ if (rsc->track->write_batch) {
+ tc_assert_driver_thread(ctx->tc);
+ fd_context_access_begin(ctx);
+ fd_batch_flush(rsc->track->write_batch);
+ fd_context_access_end(ctx);
+ }
+
+ /* get the result: */
+ fd_resource_wait(ctx, rsc, DRM_FREEDRENO_PREP_READ);
+
+ void *ptr = fd_bo_map(rsc->bo);
+ p->result(aq, ptr, result);
+ fd_bo_cpu_fini(rsc->bo);
+
+ return true;
}
static const struct fd_query_funcs acc_query_funcs = {
- .destroy_query = fd_acc_destroy_query,
- .begin_query = fd_acc_begin_query,
- .end_query = fd_acc_end_query,
- .get_query_result = fd_acc_get_query_result,
+ .destroy_query = fd_acc_destroy_query,
+ .begin_query = fd_acc_begin_query,
+ .end_query = fd_acc_end_query,
+ .get_query_result = fd_acc_get_query_result,
};
struct fd_query *
fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
- unsigned index, const struct fd_acc_sample_provider *provider)
+ unsigned index,
+ const struct fd_acc_sample_provider *provider)
{
- struct fd_acc_query *aq;
- struct fd_query *q;
+ struct fd_acc_query *aq;
+ struct fd_query *q;
- aq = CALLOC_STRUCT(fd_acc_query);
- if (!aq)
- return NULL;
+ aq = CALLOC_STRUCT(fd_acc_query);
+ if (!aq)
+ return NULL;
- DBG("%p: query_type=%u", aq, query_type);
+ DBG("%p: query_type=%u", aq, query_type);
- aq->provider = provider;
- aq->size = provider->size;
+ aq->provider = provider;
+ aq->size = provider->size;
- list_inithead(&aq->node);
+ list_inithead(&aq->node);
- q = &aq->base;
- q->funcs = &acc_query_funcs;
- q->type = query_type;
- q->index = index;
+ q = &aq->base;
+ q->funcs = &acc_query_funcs;
+ q->type = query_type;
+ q->index = index;
- return q;
+ return q;
}
struct fd_query *
-fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
- unsigned index)
+fd_acc_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
{
- int idx = pidx(query_type);
+ int idx = pidx(query_type);
- if ((idx < 0) || !ctx->acc_sample_providers[idx])
- return NULL;
+ if ((idx < 0) || !ctx->acc_sample_providers[idx])
+ return NULL;
- return fd_acc_create_query2(ctx, query_type, index,
- ctx->acc_sample_providers[idx]);
+ return fd_acc_create_query2(ctx, query_type, index,
+ ctx->acc_sample_providers[idx]);
}
/* Called at clear/draw/blit time to enable/disable the appropriate queries in
void
fd_acc_query_update_batch(struct fd_batch *batch, bool disable_all)
{
- struct fd_context *ctx = batch->ctx;
-
- if (disable_all || ctx->update_active_queries) {
- struct fd_acc_query *aq;
- LIST_FOR_EACH_ENTRY(aq, &ctx->acc_active_queries, node) {
- bool batch_change = aq->batch != batch;
- bool was_active = aq->batch != NULL;
- bool now_active = !disable_all &&
- (ctx->active_queries || aq->provider->always);
-
- if (was_active && (!now_active || batch_change))
- fd_acc_query_pause(aq);
- if (now_active && (!was_active || batch_change))
- fd_acc_query_resume(aq, batch);
- }
- }
-
- ctx->update_active_queries = false;
+ struct fd_context *ctx = batch->ctx;
+
+ if (disable_all || ctx->update_active_queries) {
+ struct fd_acc_query *aq;
+ LIST_FOR_EACH_ENTRY (aq, &ctx->acc_active_queries, node) {
+ bool batch_change = aq->batch != batch;
+ bool was_active = aq->batch != NULL;
+ bool now_active =
+ !disable_all && (ctx->active_queries || aq->provider->always);
+
+ if (was_active && (!now_active || batch_change))
+ fd_acc_query_pause(aq);
+ if (now_active && (!was_active || batch_change))
+ fd_acc_query_resume(aq, batch);
+ }
+ }
+
+ ctx->update_active_queries = false;
}
void
fd_acc_query_register_provider(struct pipe_context *pctx,
- const struct fd_acc_sample_provider *provider)
+ const struct fd_acc_sample_provider *provider)
{
- struct fd_context *ctx = fd_context(pctx);
- int idx = pidx(provider->query_type);
+ struct fd_context *ctx = fd_context(pctx);
+ int idx = pidx(provider->query_type);
- assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
- assert(!ctx->acc_sample_providers[idx]);
+ assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
+ assert(!ctx->acc_sample_providers[idx]);
- ctx->acc_sample_providers[idx] = provider;
+ ctx->acc_sample_providers[idx] = provider;
}
#include "util/list.h"
-#include "freedreno_query.h"
#include "freedreno_context.h"
-
+#include "freedreno_query.h"
/*
* Accumulated HW Queries:
* span multiple batches, etc.
*/
-
struct fd_acc_query;
struct fd_acc_sample_provider {
- unsigned query_type;
+ unsigned query_type;
- /* Set if the provider should still count while !ctx->active_queries */
- bool always;
+ /* Set if the provider should still count while !ctx->active_queries */
+ bool always;
- unsigned size;
+ unsigned size;
- void (*resume)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
- void (*pause)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
+ void (*resume)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
+ void (*pause)(struct fd_acc_query *aq, struct fd_batch *batch) dt;
- void (*result)(struct fd_acc_query *aq, void *buf,
- union pipe_query_result *result);
+ void (*result)(struct fd_acc_query *aq, void *buf,
+ union pipe_query_result *result);
};
struct fd_acc_query {
- struct fd_query base;
+ struct fd_query base;
- const struct fd_acc_sample_provider *provider;
+ const struct fd_acc_sample_provider *provider;
- struct pipe_resource *prsc;
+ struct pipe_resource *prsc;
- /* Pointer to the batch that our query has had resume() called on (if
- * any).
- */
- struct fd_batch *batch;
+ /* Pointer to the batch that our query has had resume() called on (if
+ * any).
+ */
+ struct fd_batch *batch;
- /* usually the same as provider->size but for batch queries we
- * need to calculate the size dynamically when the query is
- * allocated:
- */
- unsigned size;
+ /* usually the same as provider->size but for batch queries we
+ * need to calculate the size dynamically when the query is
+ * allocated:
+ */
+ unsigned size;
- struct list_head node; /* list-node in ctx->active_acc_queries */
+ struct list_head node; /* list-node in ctx->active_acc_queries */
- int no_wait_cnt; /* see fd_acc_get_query_result() */
+ int no_wait_cnt; /* see fd_acc_get_query_result() */
- void *query_data; /* query specific data */
+ void *query_data; /* query specific data */
};
static inline struct fd_acc_query *
fd_acc_query(struct fd_query *q)
{
- return (struct fd_acc_query *)q;
+ return (struct fd_acc_query *)q;
}
-struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
- unsigned index);
-struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
- unsigned index, const struct fd_acc_sample_provider *provider);
-void fd_acc_query_update_batch(struct fd_batch *batch, bool disable_all) assert_dt;
-void fd_acc_query_register_provider(struct pipe_context *pctx,
- const struct fd_acc_sample_provider *provider);
+struct fd_query *fd_acc_create_query(struct fd_context *ctx,
+ unsigned query_type, unsigned index);
+struct fd_query *
+fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
+ unsigned index,
+ const struct fd_acc_sample_provider *provider);
+void fd_acc_query_update_batch(struct fd_batch *batch,
+ bool disable_all) assert_dt;
+void
+fd_acc_query_register_provider(struct pipe_context *pctx,
+ const struct fd_acc_sample_provider *provider);
#endif /* FREEDRENO_QUERY_ACC_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
-#include "freedreno_query_hw.h"
#include "freedreno_context.h"
+#include "freedreno_query_hw.h"
#include "freedreno_resource.h"
#include "freedreno_util.h"
struct fd_hw_sample_period {
- struct fd_hw_sample *start, *end;
- struct list_head list;
+ struct fd_hw_sample *start, *end;
+ struct list_head list;
};
static struct fd_hw_sample *
get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
- unsigned query_type)
- assert_dt
+ unsigned query_type) assert_dt
{
- struct fd_context *ctx = batch->ctx;
- struct fd_hw_sample *samp = NULL;
- int idx = pidx(query_type);
+ struct fd_context *ctx = batch->ctx;
+ struct fd_hw_sample *samp = NULL;
+ int idx = pidx(query_type);
- assume(idx >= 0); /* query never would have been created otherwise */
+ assume(idx >= 0); /* query never would have been created otherwise */
- if (!batch->sample_cache[idx]) {
- struct fd_hw_sample *new_samp =
- ctx->hw_sample_providers[idx]->get_sample(batch, ring);
- fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
- util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
- batch->needs_flush = true;
- }
+ if (!batch->sample_cache[idx]) {
+ struct fd_hw_sample *new_samp =
+ ctx->hw_sample_providers[idx]->get_sample(batch, ring);
+ fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
+ util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
+ batch->needs_flush = true;
+ }
- fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
+ fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
- return samp;
+ return samp;
}
static void
clear_sample_cache(struct fd_batch *batch)
{
- int i;
+ int i;
- for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
- fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
+ for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
+ fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
}
static bool
query_active_in_batch(struct fd_batch *batch, struct fd_hw_query *hq)
{
- int idx = pidx(hq->provider->query_type);
- return batch->query_providers_active & (1 << idx);
+ int idx = pidx(hq->provider->query_type);
+ return batch->query_providers_active & (1 << idx);
}
static void
resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
- struct fd_ringbuffer *ring)
- assert_dt
+ struct fd_ringbuffer *ring) assert_dt
{
- int idx = pidx(hq->provider->query_type);
- DBG("%p", hq);
- assert(idx >= 0); /* query never would have been created otherwise */
- assert(!hq->period);
- batch->query_providers_used |= (1 << idx);
- batch->query_providers_active |= (1 << idx);
- hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
- list_inithead(&hq->period->list);
- hq->period->start = get_sample(batch, ring, hq->base.type);
- /* NOTE: slab_alloc_st() does not zero out the buffer: */
- hq->period->end = NULL;
+ int idx = pidx(hq->provider->query_type);
+ DBG("%p", hq);
+ assert(idx >= 0); /* query never would have been created otherwise */
+ assert(!hq->period);
+ batch->query_providers_used |= (1 << idx);
+ batch->query_providers_active |= (1 << idx);
+ hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
+ list_inithead(&hq->period->list);
+ hq->period->start = get_sample(batch, ring, hq->base.type);
+ /* NOTE: slab_alloc_st() does not zero out the buffer: */
+ hq->period->end = NULL;
}
static void
pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
- struct fd_ringbuffer *ring)
- assert_dt
+ struct fd_ringbuffer *ring) assert_dt
{
- ASSERTED int idx = pidx(hq->provider->query_type);
- DBG("%p", hq);
- assert(idx >= 0); /* query never would have been created otherwise */
- assert(hq->period && !hq->period->end);
- assert(query_active_in_batch(batch, hq));
- batch->query_providers_active &= ~(1 << idx);
- hq->period->end = get_sample(batch, ring, hq->base.type);
- list_addtail(&hq->period->list, &hq->periods);
- hq->period = NULL;
+ ASSERTED int idx = pidx(hq->provider->query_type);
+ DBG("%p", hq);
+ assert(idx >= 0); /* query never would have been created otherwise */
+ assert(hq->period && !hq->period->end);
+ assert(query_active_in_batch(batch, hq));
+ batch->query_providers_active &= ~(1 << idx);
+ hq->period->end = get_sample(batch, ring, hq->base.type);
+ list_addtail(&hq->period->list, &hq->periods);
+ hq->period = NULL;
}
static void
destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
{
- struct fd_hw_sample_period *period, *s;
- LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
- fd_hw_sample_reference(ctx, &period->start, NULL);
- fd_hw_sample_reference(ctx, &period->end, NULL);
- list_del(&period->list);
- slab_free_st(&ctx->sample_period_pool, period);
- }
+ struct fd_hw_sample_period *period, *s;
+ LIST_FOR_EACH_ENTRY_SAFE (period, s, &hq->periods, list) {
+ fd_hw_sample_reference(ctx, &period->start, NULL);
+ fd_hw_sample_reference(ctx, &period->end, NULL);
+ list_del(&period->list);
+ slab_free_st(&ctx->sample_period_pool, period);
+ }
}
static void
fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
{
- struct fd_hw_query *hq = fd_hw_query(q);
+ struct fd_hw_query *hq = fd_hw_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- destroy_periods(ctx, hq);
- list_del(&hq->list);
+ destroy_periods(ctx, hq);
+ list_del(&hq->list);
- free(hq);
+ free(hq);
}
static void
-fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_batch *batch = fd_context_batch_locked(ctx);
- struct fd_hw_query *hq = fd_hw_query(q);
+ struct fd_batch *batch = fd_context_batch_locked(ctx);
+ struct fd_hw_query *hq = fd_hw_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- /* begin_query() should clear previous results: */
- destroy_periods(ctx, hq);
+ /* begin_query() should clear previous results: */
+ destroy_periods(ctx, hq);
- if (batch && (ctx->active_queries || hq->provider->always))
- resume_query(batch, hq, batch->draw);
+ if (batch && (ctx->active_queries || hq->provider->always))
+ resume_query(batch, hq, batch->draw);
- /* add to active list: */
- assert(list_is_empty(&hq->list));
- list_addtail(&hq->list, &ctx->hw_active_queries);
+ /* add to active list: */
+ assert(list_is_empty(&hq->list));
+ list_addtail(&hq->list, &ctx->hw_active_queries);
- fd_batch_unlock_submit(batch);
- fd_batch_reference(&batch, NULL);
+ fd_batch_unlock_submit(batch);
+ fd_batch_reference(&batch, NULL);
}
static void
-fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_batch *batch = fd_context_batch_locked(ctx);
- struct fd_hw_query *hq = fd_hw_query(q);
+ struct fd_batch *batch = fd_context_batch_locked(ctx);
+ struct fd_hw_query *hq = fd_hw_query(q);
- DBG("%p", q);
+ DBG("%p", q);
- if (batch && (ctx->active_queries || hq->provider->always))
- pause_query(batch, hq, batch->draw);
+ if (batch && (ctx->active_queries || hq->provider->always))
+ pause_query(batch, hq, batch->draw);
- /* remove from active list: */
- list_delinit(&hq->list);
+ /* remove from active list: */
+ list_delinit(&hq->list);
- fd_batch_unlock_submit(batch);
- fd_batch_reference(&batch, NULL);
+ fd_batch_unlock_submit(batch);
+ fd_batch_reference(&batch, NULL);
}
/* helper to get ptr to specified sample: */
-static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
+static void *
+sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
{
- return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
+ return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
}
static bool
-fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
- bool wait, union pipe_query_result *result)
+fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
+ union pipe_query_result *result)
{
- struct fd_hw_query *hq = fd_hw_query(q);
- const struct fd_hw_sample_provider *p = hq->provider;
- struct fd_hw_sample_period *period;
-
- DBG("%p: wait=%d", q, wait);
-
- if (list_is_empty(&hq->periods))
- return true;
-
- assert(list_is_empty(&hq->list));
- assert(!hq->period);
-
- /* if !wait, then check the last sample (the one most likely to
- * not be ready yet) and bail if it is not ready:
- */
- if (!wait) {
- int ret;
-
- period = LIST_ENTRY(struct fd_hw_sample_period,
- hq->periods.prev, list);
-
- struct fd_resource *rsc = fd_resource(period->end->prsc);
-
- if (pending(rsc, false)) {
- assert(!q->base.flushed);
- tc_assert_driver_thread(ctx->tc);
-
- /* piglit spec@arb_occlusion_query@occlusion_query_conform
- * test, and silly apps perhaps, get stuck in a loop trying
- * to get query result forever with wait==false.. we don't
- * wait to flush unnecessarily but we also don't want to
- * spin forever:
- */
- if (hq->no_wait_cnt++ > 5) {
- fd_context_access_begin(ctx);
- fd_batch_flush(rsc->track->write_batch);
- fd_context_access_end(ctx);
- }
- return false;
- }
-
- if (!rsc->bo)
- return false;
-
- ret = fd_resource_wait(ctx, rsc,
- DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
- if (ret)
- return false;
-
- fd_bo_cpu_fini(rsc->bo);
- }
-
- /* sum the result across all sample periods: */
- LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
- struct fd_hw_sample *start = period->start;
- ASSERTED struct fd_hw_sample *end = period->end;
- unsigned i;
-
- /* start and end samples should be from same batch: */
- assert(start->prsc == end->prsc);
- assert(start->num_tiles == end->num_tiles);
-
- struct fd_resource *rsc = fd_resource(start->prsc);
-
- if (rsc->track->write_batch) {
- tc_assert_driver_thread(ctx->tc);
- fd_context_access_begin(ctx);
- fd_batch_flush(rsc->track->write_batch);
- fd_context_access_end(ctx);
- }
-
- /* some piglit tests at least do query with no draws, I guess: */
- if (!rsc->bo)
- continue;
-
- fd_resource_wait(ctx, rsc, DRM_FREEDRENO_PREP_READ);
-
- void *ptr = fd_bo_map(rsc->bo);
-
- for (i = 0; i < start->num_tiles; i++) {
- p->accumulate_result(ctx, sampptr(period->start, i, ptr),
- sampptr(period->end, i, ptr), result);
- }
-
- fd_bo_cpu_fini(rsc->bo);
- }
-
- return true;
+ struct fd_hw_query *hq = fd_hw_query(q);
+ const struct fd_hw_sample_provider *p = hq->provider;
+ struct fd_hw_sample_period *period;
+
+ DBG("%p: wait=%d", q, wait);
+
+ if (list_is_empty(&hq->periods))
+ return true;
+
+ assert(list_is_empty(&hq->list));
+ assert(!hq->period);
+
+ /* if !wait, then check the last sample (the one most likely to
+ * not be ready yet) and bail if it is not ready:
+ */
+ if (!wait) {
+ int ret;
+
+ period = LIST_ENTRY(struct fd_hw_sample_period, hq->periods.prev, list);
+
+ struct fd_resource *rsc = fd_resource(period->end->prsc);
+
+ if (pending(rsc, false)) {
+ assert(!q->base.flushed);
+ tc_assert_driver_thread(ctx->tc);
+
+ /* piglit spec@arb_occlusion_query@occlusion_query_conform
+ * test, and silly apps perhaps, get stuck in a loop trying
+ * to get query result forever with wait==false.. we don't
+ * wait to flush unnecessarily but we also don't want to
+ * spin forever:
+ */
+ if (hq->no_wait_cnt++ > 5) {
+ fd_context_access_begin(ctx);
+ fd_batch_flush(rsc->track->write_batch);
+ fd_context_access_end(ctx);
+ }
+ return false;
+ }
+
+ if (!rsc->bo)
+ return false;
+
+ ret = fd_resource_wait(
+ ctx, rsc, DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
+ if (ret)
+ return false;
+
+ fd_bo_cpu_fini(rsc->bo);
+ }
+
+ /* sum the result across all sample periods: */
+ LIST_FOR_EACH_ENTRY (period, &hq->periods, list) {
+ struct fd_hw_sample *start = period->start;
+ ASSERTED struct fd_hw_sample *end = period->end;
+ unsigned i;
+
+ /* start and end samples should be from same batch: */
+ assert(start->prsc == end->prsc);
+ assert(start->num_tiles == end->num_tiles);
+
+ struct fd_resource *rsc = fd_resource(start->prsc);
+
+ if (rsc->track->write_batch) {
+ tc_assert_driver_thread(ctx->tc);
+ fd_context_access_begin(ctx);
+ fd_batch_flush(rsc->track->write_batch);
+ fd_context_access_end(ctx);
+ }
+
+ /* some piglit tests at least do query with no draws, I guess: */
+ if (!rsc->bo)
+ continue;
+
+ fd_resource_wait(ctx, rsc, DRM_FREEDRENO_PREP_READ);
+
+ void *ptr = fd_bo_map(rsc->bo);
+
+ for (i = 0; i < start->num_tiles; i++) {
+ p->accumulate_result(ctx, sampptr(period->start, i, ptr),
+ sampptr(period->end, i, ptr), result);
+ }
+
+ fd_bo_cpu_fini(rsc->bo);
+ }
+
+ return true;
}
static const struct fd_query_funcs hw_query_funcs = {
- .destroy_query = fd_hw_destroy_query,
- .begin_query = fd_hw_begin_query,
- .end_query = fd_hw_end_query,
- .get_query_result = fd_hw_get_query_result,
+ .destroy_query = fd_hw_destroy_query,
+ .begin_query = fd_hw_begin_query,
+ .end_query = fd_hw_end_query,
+ .get_query_result = fd_hw_get_query_result,
};
struct fd_query *
fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
{
- struct fd_hw_query *hq;
- struct fd_query *q;
- int idx = pidx(query_type);
+ struct fd_hw_query *hq;
+ struct fd_query *q;
+ int idx = pidx(query_type);
- if ((idx < 0) || !ctx->hw_sample_providers[idx])
- return NULL;
+ if ((idx < 0) || !ctx->hw_sample_providers[idx])
+ return NULL;
- hq = CALLOC_STRUCT(fd_hw_query);
- if (!hq)
- return NULL;
+ hq = CALLOC_STRUCT(fd_hw_query);
+ if (!hq)
+ return NULL;
- DBG("%p: query_type=%u", hq, query_type);
+ DBG("%p: query_type=%u", hq, query_type);
- hq->provider = ctx->hw_sample_providers[idx];
+ hq->provider = ctx->hw_sample_providers[idx];
- list_inithead(&hq->periods);
- list_inithead(&hq->list);
+ list_inithead(&hq->periods);
+ list_inithead(&hq->list);
- q = &hq->base;
- q->funcs = &hw_query_funcs;
- q->type = query_type;
- q->index = index;
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = query_type;
+ q->index = index;
- return q;
+ return q;
}
struct fd_hw_sample *
fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
{
- struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
- pipe_reference_init(&samp->reference, 1);
- samp->size = size;
- debug_assert(util_is_power_of_two_or_zero(size));
- batch->next_sample_offset = align(batch->next_sample_offset, size);
- samp->offset = batch->next_sample_offset;
- /* NOTE: slab_alloc_st() does not zero out the buffer: */
- samp->prsc = NULL;
- samp->num_tiles = 0;
- samp->tile_stride = 0;
- batch->next_sample_offset += size;
-
- if (!batch->query_buf) {
- struct pipe_screen *pscreen = &batch->ctx->screen->base;
- struct pipe_resource templ = {
- .target = PIPE_BUFFER,
- .format = PIPE_FORMAT_R8_UNORM,
- .bind = PIPE_BIND_QUERY_BUFFER,
- .width0 = 0, /* create initially zero size buffer */
- .height0 = 1,
- .depth0 = 1,
- .array_size = 1,
- .last_level = 0,
- .nr_samples = 1,
- };
- batch->query_buf = pscreen->resource_create(pscreen, &templ);
- }
-
- pipe_resource_reference(&samp->prsc, batch->query_buf);
-
- return samp;
+ struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
+ pipe_reference_init(&samp->reference, 1);
+ samp->size = size;
+ debug_assert(util_is_power_of_two_or_zero(size));
+ batch->next_sample_offset = align(batch->next_sample_offset, size);
+ samp->offset = batch->next_sample_offset;
+ /* NOTE: slab_alloc_st() does not zero out the buffer: */
+ samp->prsc = NULL;
+ samp->num_tiles = 0;
+ samp->tile_stride = 0;
+ batch->next_sample_offset += size;
+
+ if (!batch->query_buf) {
+ struct pipe_screen *pscreen = &batch->ctx->screen->base;
+ struct pipe_resource templ = {
+ .target = PIPE_BUFFER,
+ .format = PIPE_FORMAT_R8_UNORM,
+ .bind = PIPE_BIND_QUERY_BUFFER,
+ .width0 = 0, /* create initially zero size buffer */
+ .height0 = 1,
+ .depth0 = 1,
+ .array_size = 1,
+ .last_level = 0,
+ .nr_samples = 1,
+ };
+ batch->query_buf = pscreen->resource_create(pscreen, &templ);
+ }
+
+ pipe_resource_reference(&samp->prsc, batch->query_buf);
+
+ return samp;
}
void
__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
{
- pipe_resource_reference(&samp->prsc, NULL);
- slab_free_st(&ctx->sample_pool, samp);
+ pipe_resource_reference(&samp->prsc, NULL);
+ slab_free_st(&ctx->sample_pool, samp);
}
/* called from gmem code once total storage requirements are known (ie.
void
fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
{
- uint32_t tile_stride = batch->next_sample_offset;
+ uint32_t tile_stride = batch->next_sample_offset;
- if (tile_stride > 0)
- fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
+ if (tile_stride > 0)
+ fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
- batch->query_tile_stride = tile_stride;
+ batch->query_tile_stride = tile_stride;
- while (batch->samples.size > 0) {
- struct fd_hw_sample *samp =
- util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
- samp->num_tiles = num_tiles;
- samp->tile_stride = tile_stride;
- fd_hw_sample_reference(batch->ctx, &samp, NULL);
- }
+ while (batch->samples.size > 0) {
+ struct fd_hw_sample *samp =
+ util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
+ samp->num_tiles = num_tiles;
+ samp->tile_stride = tile_stride;
+ fd_hw_sample_reference(batch->ctx, &samp, NULL);
+ }
- /* reset things for next batch: */
- batch->next_sample_offset = 0;
+ /* reset things for next batch: */
+ batch->next_sample_offset = 0;
}
void
fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
- struct fd_ringbuffer *ring)
+ struct fd_ringbuffer *ring)
{
- uint32_t tile_stride = batch->query_tile_stride;
- uint32_t offset = tile_stride * n;
+ uint32_t tile_stride = batch->query_tile_stride;
+ uint32_t offset = tile_stride * n;
- /* bail if no queries: */
- if (tile_stride == 0)
- return;
+ /* bail if no queries: */
+ if (tile_stride == 0)
+ return;
- fd_wfi(batch, ring);
- OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
- OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
+ fd_wfi(batch, ring);
+ OUT_PKT0(ring, HW_QUERY_BASE_REG, 1);
+ OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
}
void
fd_hw_query_update_batch(struct fd_batch *batch, bool disable_all)
{
- struct fd_context *ctx = batch->ctx;
-
- if (disable_all || ctx->update_active_queries) {
- struct fd_hw_query *hq;
- LIST_FOR_EACH_ENTRY(hq, &batch->ctx->hw_active_queries, list) {
- bool was_active = query_active_in_batch(batch, hq);
- bool now_active = !disable_all &&
- (ctx->active_queries || hq->provider->always);
-
- if (now_active && !was_active)
- resume_query(batch, hq, batch->draw);
- else if (was_active && !now_active)
- pause_query(batch, hq, batch->draw);
- }
- }
- clear_sample_cache(batch);
+ struct fd_context *ctx = batch->ctx;
+
+ if (disable_all || ctx->update_active_queries) {
+ struct fd_hw_query *hq;
+ LIST_FOR_EACH_ENTRY (hq, &batch->ctx->hw_active_queries, list) {
+ bool was_active = query_active_in_batch(batch, hq);
+ bool now_active =
+ !disable_all && (ctx->active_queries || hq->provider->always);
+
+ if (now_active && !was_active)
+ resume_query(batch, hq, batch->draw);
+ else if (was_active && !now_active)
+ pause_query(batch, hq, batch->draw);
+ }
+ }
+ clear_sample_cache(batch);
}
/* call the provider->enable() for all the hw queries that were active
void
fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
{
- struct fd_context *ctx = batch->ctx;
- for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
- if (batch->query_providers_used & (1 << idx)) {
- assert(ctx->hw_sample_providers[idx]);
- if (ctx->hw_sample_providers[idx]->enable)
- ctx->hw_sample_providers[idx]->enable(ctx, ring);
- }
- }
+ struct fd_context *ctx = batch->ctx;
+ for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
+ if (batch->query_providers_used & (1 << idx)) {
+ assert(ctx->hw_sample_providers[idx]);
+ if (ctx->hw_sample_providers[idx]->enable)
+ ctx->hw_sample_providers[idx]->enable(ctx, ring);
+ }
+ }
}
void
fd_hw_query_register_provider(struct pipe_context *pctx,
- const struct fd_hw_sample_provider *provider)
+ const struct fd_hw_sample_provider *provider)
{
- struct fd_context *ctx = fd_context(pctx);
- int idx = pidx(provider->query_type);
+ struct fd_context *ctx = fd_context(pctx);
+ int idx = pidx(provider->query_type);
- assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
- assert(!ctx->hw_sample_providers[idx]);
+ assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
+ assert(!ctx->hw_sample_providers[idx]);
- ctx->hw_sample_providers[idx] = provider;
+ ctx->hw_sample_providers[idx] = provider;
}
void
fd_hw_query_init(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
- 16);
- slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
- 16);
+ slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), 16);
+ slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
+ 16);
}
void
fd_hw_query_fini(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- slab_destroy(&ctx->sample_pool);
- slab_destroy(&ctx->sample_period_pool);
+ slab_destroy(&ctx->sample_pool);
+ slab_destroy(&ctx->sample_period_pool);
}
#include "util/list.h"
-#include "freedreno_query.h"
#include "freedreno_context.h"
-
+#include "freedreno_query.h"
/*
* HW Queries:
*/
struct fd_hw_sample_provider {
- unsigned query_type;
-
- /* Set if the provider should still count while !ctx->active_queries */
- bool always;
-
- /* Optional hook for enabling a counter. Guaranteed to happen
- * at least once before the first ->get_sample() in a batch.
- */
- void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring) dt;
-
- /* when a new sample is required, emit appropriate cmdstream
- * and return a sample object:
- */
- struct fd_hw_sample *(*get_sample)(struct fd_batch *batch,
- struct fd_ringbuffer *ring) dt;
-
- /* accumulate the results from specified sample period: */
- void (*accumulate_result)(struct fd_context *ctx,
- const void *start, const void *end,
- union pipe_query_result *result);
+ unsigned query_type;
+
+ /* Set if the provider should still count while !ctx->active_queries */
+ bool always;
+
+ /* Optional hook for enabling a counter. Guaranteed to happen
+ * at least once before the first ->get_sample() in a batch.
+ */
+ void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring) dt;
+
+ /* when a new sample is required, emit appropriate cmdstream
+ * and return a sample object:
+ */
+ struct fd_hw_sample *(*get_sample)(struct fd_batch *batch,
+ struct fd_ringbuffer *ring)dt;
+
+ /* accumulate the results from specified sample period: */
+ void (*accumulate_result)(struct fd_context *ctx, const void *start,
+ const void *end, union pipe_query_result *result);
};
struct fd_hw_sample {
- struct pipe_reference reference; /* keep this first */
-
- /* offset and size of the sample are know at the time the
- * sample is constructed.
- */
- uint32_t size;
- uint32_t offset;
-
- /* backing object, offset/stride/etc are determined not when
- * the sample is constructed, but when the batch is submitted.
- * This way we can defer allocation until total # of requested
- * samples, and total # of tiles, is known.
- */
- struct pipe_resource *prsc;
- uint32_t num_tiles;
- uint32_t tile_stride;
+ struct pipe_reference reference; /* keep this first */
+
+ /* offset and size of the sample are know at the time the
+ * sample is constructed.
+ */
+ uint32_t size;
+ uint32_t offset;
+
+ /* backing object, offset/stride/etc are determined not when
+ * the sample is constructed, but when the batch is submitted.
+ * This way we can defer allocation until total # of requested
+ * samples, and total # of tiles, is known.
+ */
+ struct pipe_resource *prsc;
+ uint32_t num_tiles;
+ uint32_t tile_stride;
};
struct fd_hw_sample_period;
struct fd_hw_query {
- struct fd_query base;
+ struct fd_query base;
- const struct fd_hw_sample_provider *provider;
+ const struct fd_hw_sample_provider *provider;
- /* list of fd_hw_sample_periods: */
- struct list_head periods;
+ /* list of fd_hw_sample_periods: */
+ struct list_head periods;
- /* if active and not paused, the current sample period (not
- * yet added to current_periods):
- */
- struct fd_hw_sample_period *period;
+ /* if active and not paused, the current sample period (not
+ * yet added to current_periods):
+ */
+ struct fd_hw_sample_period *period;
- struct list_head list; /* list-node in batch->active_queries */
+ struct list_head list; /* list-node in batch->active_queries */
- int no_wait_cnt; /* see fd_hw_get_query_result */
+ int no_wait_cnt; /* see fd_hw_get_query_result */
};
static inline struct fd_hw_query *
fd_hw_query(struct fd_query *q)
{
- return (struct fd_hw_query *)q;
+ return (struct fd_hw_query *)q;
}
-struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index);
+struct fd_query *fd_hw_create_query(struct fd_context *ctx, unsigned query_type,
+ unsigned index);
/* helper for sample providers: */
-struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
+struct fd_hw_sample *fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
/* don't call directly, use fd_hw_sample_reference() */
void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
void fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles) assert_dt;
void fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
- struct fd_ringbuffer *ring) assert_dt;
+ struct fd_ringbuffer *ring) assert_dt;
void fd_hw_query_update_batch(struct fd_batch *batch, bool end_batch) assert_dt;
-void fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
-void fd_hw_query_register_provider(struct pipe_context *pctx,
- const struct fd_hw_sample_provider *provider);
+void fd_hw_query_enable(struct fd_batch *batch,
+ struct fd_ringbuffer *ring) assert_dt;
+void
+fd_hw_query_register_provider(struct pipe_context *pctx,
+ const struct fd_hw_sample_provider *provider);
void fd_hw_query_init(struct pipe_context *pctx);
void fd_hw_query_fini(struct pipe_context *pctx);
static inline void
-fd_hw_sample_reference(struct fd_context *ctx,
- struct fd_hw_sample **ptr, struct fd_hw_sample *samp)
+fd_hw_sample_reference(struct fd_context *ctx, struct fd_hw_sample **ptr,
+ struct fd_hw_sample *samp)
{
- struct fd_hw_sample *old_samp = *ptr;
+ struct fd_hw_sample *old_samp = *ptr;
- if (pipe_reference(&(*ptr)->reference, &samp->reference))
- __fd_hw_sample_destroy(ctx, old_samp);
- *ptr = samp;
+ if (pipe_reference(&(*ptr)->reference, &samp->reference))
+ __fd_hw_sample_destroy(ctx, old_samp);
+ *ptr = samp;
}
#endif /* FREEDRENO_QUERY_HW_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/os_time.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "freedreno_query_sw.h"
#include "freedreno_context.h"
+#include "freedreno_query_sw.h"
#include "freedreno_util.h"
/*
static void
fd_sw_destroy_query(struct fd_context *ctx, struct fd_query *q)
{
- struct fd_sw_query *sq = fd_sw_query(q);
- free(sq);
+ struct fd_sw_query *sq = fd_sw_query(q);
+ free(sq);
}
static uint64_t
-read_counter(struct fd_context *ctx, int type)
- assert_dt
+read_counter(struct fd_context *ctx, int type) assert_dt
{
- switch (type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- return ctx->stats.prims_generated;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- return ctx->stats.prims_emitted;
- case FD_QUERY_DRAW_CALLS:
- return ctx->stats.draw_calls;
- case FD_QUERY_BATCH_TOTAL:
- return ctx->stats.batch_total;
- case FD_QUERY_BATCH_SYSMEM:
- return ctx->stats.batch_sysmem;
- case FD_QUERY_BATCH_GMEM:
- return ctx->stats.batch_gmem;
- case FD_QUERY_BATCH_NONDRAW:
- return ctx->stats.batch_nondraw;
- case FD_QUERY_BATCH_RESTORE:
- return ctx->stats.batch_restore;
- case FD_QUERY_STAGING_UPLOADS:
- return ctx->stats.staging_uploads;
- case FD_QUERY_SHADOW_UPLOADS:
- return ctx->stats.shadow_uploads;
- case FD_QUERY_VS_REGS:
- return ctx->stats.vs_regs;
- case FD_QUERY_FS_REGS:
- return ctx->stats.fs_regs;
- }
- return 0;
+ switch (type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ return ctx->stats.prims_generated;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ return ctx->stats.prims_emitted;
+ case FD_QUERY_DRAW_CALLS:
+ return ctx->stats.draw_calls;
+ case FD_QUERY_BATCH_TOTAL:
+ return ctx->stats.batch_total;
+ case FD_QUERY_BATCH_SYSMEM:
+ return ctx->stats.batch_sysmem;
+ case FD_QUERY_BATCH_GMEM:
+ return ctx->stats.batch_gmem;
+ case FD_QUERY_BATCH_NONDRAW:
+ return ctx->stats.batch_nondraw;
+ case FD_QUERY_BATCH_RESTORE:
+ return ctx->stats.batch_restore;
+ case FD_QUERY_STAGING_UPLOADS:
+ return ctx->stats.staging_uploads;
+ case FD_QUERY_SHADOW_UPLOADS:
+ return ctx->stats.shadow_uploads;
+ case FD_QUERY_VS_REGS:
+ return ctx->stats.vs_regs;
+ case FD_QUERY_FS_REGS:
+ return ctx->stats.fs_regs;
+ }
+ return 0;
}
static bool
is_time_rate_query(struct fd_query *q)
{
- switch (q->type) {
- case FD_QUERY_BATCH_TOTAL:
- case FD_QUERY_BATCH_SYSMEM:
- case FD_QUERY_BATCH_GMEM:
- case FD_QUERY_BATCH_NONDRAW:
- case FD_QUERY_BATCH_RESTORE:
- case FD_QUERY_STAGING_UPLOADS:
- case FD_QUERY_SHADOW_UPLOADS:
- return true;
- default:
- return false;
- }
+ switch (q->type) {
+ case FD_QUERY_BATCH_TOTAL:
+ case FD_QUERY_BATCH_SYSMEM:
+ case FD_QUERY_BATCH_GMEM:
+ case FD_QUERY_BATCH_NONDRAW:
+ case FD_QUERY_BATCH_RESTORE:
+ case FD_QUERY_STAGING_UPLOADS:
+ case FD_QUERY_SHADOW_UPLOADS:
+ return true;
+ default:
+ return false;
+ }
}
static bool
is_draw_rate_query(struct fd_query *q)
{
- switch (q->type) {
- case FD_QUERY_VS_REGS:
- case FD_QUERY_FS_REGS:
- return true;
- default:
- return false;
- }
+ switch (q->type) {
+ case FD_QUERY_VS_REGS:
+ case FD_QUERY_FS_REGS:
+ return true;
+ default:
+ return false;
+ }
}
static void
-fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_sw_query *sq = fd_sw_query(q);
+ struct fd_sw_query *sq = fd_sw_query(q);
- ctx->stats_users++;
+ ctx->stats_users++;
- sq->begin_value = read_counter(ctx, q->type);
- if (is_time_rate_query(q)) {
- sq->begin_time = os_time_get();
- } else if (is_draw_rate_query(q)) {
- sq->begin_time = ctx->stats.draw_calls;
- }
+ sq->begin_value = read_counter(ctx, q->type);
+ if (is_time_rate_query(q)) {
+ sq->begin_time = os_time_get();
+ } else if (is_draw_rate_query(q)) {
+ sq->begin_time = ctx->stats.draw_calls;
+ }
}
static void
-fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
- assert_dt
+fd_sw_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
{
- struct fd_sw_query *sq = fd_sw_query(q);
+ struct fd_sw_query *sq = fd_sw_query(q);
- assert(ctx->stats_users > 0);
- ctx->stats_users--;
+ assert(ctx->stats_users > 0);
+ ctx->stats_users--;
- sq->end_value = read_counter(ctx, q->type);
- if (is_time_rate_query(q)) {
- sq->end_time = os_time_get();
- } else if (is_draw_rate_query(q)) {
- sq->end_time = ctx->stats.draw_calls;
- }
+ sq->end_value = read_counter(ctx, q->type);
+ if (is_time_rate_query(q)) {
+ sq->end_time = os_time_get();
+ } else if (is_draw_rate_query(q)) {
+ sq->end_time = ctx->stats.draw_calls;
+ }
}
static bool
-fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
- bool wait, union pipe_query_result *result)
+fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
+ union pipe_query_result *result)
{
- struct fd_sw_query *sq = fd_sw_query(q);
+ struct fd_sw_query *sq = fd_sw_query(q);
- result->u64 = sq->end_value - sq->begin_value;
+ result->u64 = sq->end_value - sq->begin_value;
- if (is_time_rate_query(q)) {
- double fps = (result->u64 * 1000000) /
- (double)(sq->end_time - sq->begin_time);
- result->u64 = (uint64_t)fps;
- } else if (is_draw_rate_query(q)) {
- double avg = ((double)result->u64) /
- (double)(sq->end_time - sq->begin_time);
- result->f = avg;
- }
+ if (is_time_rate_query(q)) {
+ double fps =
+ (result->u64 * 1000000) / (double)(sq->end_time - sq->begin_time);
+ result->u64 = (uint64_t)fps;
+ } else if (is_draw_rate_query(q)) {
+ double avg =
+ ((double)result->u64) / (double)(sq->end_time - sq->begin_time);
+ result->f = avg;
+ }
- return true;
+ return true;
}
static const struct fd_query_funcs sw_query_funcs = {
- .destroy_query = fd_sw_destroy_query,
- .begin_query = fd_sw_begin_query,
- .end_query = fd_sw_end_query,
- .get_query_result = fd_sw_get_query_result,
+ .destroy_query = fd_sw_destroy_query,
+ .begin_query = fd_sw_begin_query,
+ .end_query = fd_sw_end_query,
+ .get_query_result = fd_sw_get_query_result,
};
struct fd_query *
fd_sw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
{
- struct fd_sw_query *sq;
- struct fd_query *q;
-
- switch (query_type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case FD_QUERY_DRAW_CALLS:
- case FD_QUERY_BATCH_TOTAL:
- case FD_QUERY_BATCH_SYSMEM:
- case FD_QUERY_BATCH_GMEM:
- case FD_QUERY_BATCH_NONDRAW:
- case FD_QUERY_BATCH_RESTORE:
- case FD_QUERY_STAGING_UPLOADS:
- case FD_QUERY_SHADOW_UPLOADS:
- case FD_QUERY_VS_REGS:
- case FD_QUERY_FS_REGS:
- break;
- default:
- return NULL;
- }
-
- sq = CALLOC_STRUCT(fd_sw_query);
- if (!sq)
- return NULL;
-
- q = &sq->base;
- q->funcs = &sw_query_funcs;
- q->type = query_type;
-
- return q;
+ struct fd_sw_query *sq;
+ struct fd_query *q;
+
+ switch (query_type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case FD_QUERY_DRAW_CALLS:
+ case FD_QUERY_BATCH_TOTAL:
+ case FD_QUERY_BATCH_SYSMEM:
+ case FD_QUERY_BATCH_GMEM:
+ case FD_QUERY_BATCH_NONDRAW:
+ case FD_QUERY_BATCH_RESTORE:
+ case FD_QUERY_STAGING_UPLOADS:
+ case FD_QUERY_SHADOW_UPLOADS:
+ case FD_QUERY_VS_REGS:
+ case FD_QUERY_FS_REGS:
+ break;
+ default:
+ return NULL;
+ }
+
+ sq = CALLOC_STRUCT(fd_sw_query);
+ if (!sq)
+ return NULL;
+
+ q = &sq->base;
+ q->funcs = &sw_query_funcs;
+ q->type = query_type;
+
+ return q;
}
*/
struct fd_sw_query {
- struct fd_query base;
- uint64_t begin_value, end_value;
- uint64_t begin_time, end_time;
+ struct fd_query base;
+ uint64_t begin_value, end_value;
+ uint64_t begin_time, end_time;
};
static inline struct fd_sw_query *
fd_sw_query(struct fd_query *q)
{
- return (struct fd_sw_query *)q;
+ return (struct fd_sw_query *)q;
}
-struct fd_query * fd_sw_create_query(struct fd_context *ctx,
- unsigned query_type, unsigned index);
+struct fd_query *fd_sw_create_query(struct fd_context *ctx, unsigned query_type,
+ unsigned index);
#endif /* FREEDRENO_QUERY_SW_H_ */
#include "util/format/u_format.h"
#include "util/format/u_format_rgtc.h"
#include "util/format/u_format_zs.h"
+#include "util/set.h"
+#include "util/u_drm.h"
#include "util/u_inlines.h"
-#include "util/u_transfer.h"
#include "util/u_string.h"
#include "util/u_surface.h"
-#include "util/set.h"
-#include "util/u_drm.h"
+#include "util/u_transfer.h"
#include "decode/util.h"
-#include "freedreno_resource.h"
#include "freedreno_batch_cache.h"
#include "freedreno_blitter.h"
+#include "freedreno_context.h"
#include "freedreno_fence.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
#include "freedreno_screen.h"
#include "freedreno_surface.h"
-#include "freedreno_context.h"
-#include "freedreno_query_hw.h"
#include "freedreno_util.h"
-#include "drm-uapi/drm_fourcc.h"
#include <errno.h>
+#include "drm-uapi/drm_fourcc.h"
/* XXX this should go away, needed for 'struct winsys_handle' */
#include "frontend/drm_driver.h"
* the layout(s) of the tiled modes, and whether they are the same
* across generations.
*/
-#define FD_FORMAT_MOD_QCOM_TILED fourcc_mod_code(QCOM, 0xffffffff)
+#define FD_FORMAT_MOD_QCOM_TILED fourcc_mod_code(QCOM, 0xffffffff)
/**
* Go through the entire state and see if the resource is bound
* emitted so the GPU looks at the new backing bo.
*/
static void
-rebind_resource_in_ctx(struct fd_context *ctx, struct fd_resource *rsc)
- assert_dt
+rebind_resource_in_ctx(struct fd_context *ctx,
+ struct fd_resource *rsc) assert_dt
{
- struct pipe_resource *prsc = &rsc->b.b;
-
- if (ctx->rebind_resource)
- ctx->rebind_resource(ctx, rsc);
-
- /* VBOs */
- if (rsc->dirty & FD_DIRTY_VTXBUF) {
- struct fd_vertexbuf_stateobj *vb = &ctx->vtx.vertexbuf;
- for (unsigned i = 0; i < vb->count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
- if (vb->vb[i].buffer.resource == prsc)
- fd_context_dirty(ctx, FD_DIRTY_VTXBUF);
- }
- }
-
- const enum fd_dirty_3d_state per_stage_dirty =
- FD_DIRTY_CONST | FD_DIRTY_TEX | FD_DIRTY_IMAGE | FD_DIRTY_SSBO;
-
- if (!(rsc->dirty & per_stage_dirty))
- return;
-
- /* per-shader-stage resources: */
- for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
- /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
- * cmdstream rather than by pointer..
- */
- if ((rsc->dirty & FD_DIRTY_CONST) &&
- !(ctx->dirty_shader[stage] & FD_DIRTY_CONST)) {
- struct fd_constbuf_stateobj *cb = &ctx->constbuf[stage];
- const unsigned num_ubos = util_last_bit(cb->enabled_mask);
- for (unsigned i = 1; i < num_ubos; i++) {
- if (cb->cb[i].buffer == prsc) {
- fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_CONST);
- break;
- }
- }
- }
-
- /* Textures */
- if ((rsc->dirty & FD_DIRTY_TEX) &&
- !(ctx->dirty_shader[stage] & FD_DIRTY_TEX)) {
- struct fd_texture_stateobj *tex = &ctx->tex[stage];
- for (unsigned i = 0; i < tex->num_textures; i++) {
- if (tex->textures[i] && (tex->textures[i]->texture == prsc)) {
- fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_TEX);
- break;
- }
- }
- }
-
- /* Images */
- if ((rsc->dirty & FD_DIRTY_IMAGE) &&
- !(ctx->dirty_shader[stage] & FD_DIRTY_IMAGE)) {
- struct fd_shaderimg_stateobj *si = &ctx->shaderimg[stage];
- const unsigned num_images = util_last_bit(si->enabled_mask);
- for (unsigned i = 0; i < num_images; i++) {
- if (si->si[i].resource == prsc) {
- fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_IMAGE);
- break;
- }
- }
- }
-
- /* SSBOs */
- if ((rsc->dirty & FD_DIRTY_SSBO) &&
- !(ctx->dirty_shader[stage] & FD_DIRTY_SSBO)) {
- struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[stage];
- const unsigned num_ssbos = util_last_bit(sb->enabled_mask);
- for (unsigned i = 0; i < num_ssbos; i++) {
- if (sb->sb[i].buffer == prsc) {
- fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_SSBO);
- break;
- }
- }
- }
- }
+ struct pipe_resource *prsc = &rsc->b.b;
+
+ if (ctx->rebind_resource)
+ ctx->rebind_resource(ctx, rsc);
+
+ /* VBOs */
+ if (rsc->dirty & FD_DIRTY_VTXBUF) {
+ struct fd_vertexbuf_stateobj *vb = &ctx->vtx.vertexbuf;
+ for (unsigned i = 0; i < vb->count && !(ctx->dirty & FD_DIRTY_VTXBUF);
+ i++) {
+ if (vb->vb[i].buffer.resource == prsc)
+ fd_context_dirty(ctx, FD_DIRTY_VTXBUF);
+ }
+ }
+
+ const enum fd_dirty_3d_state per_stage_dirty =
+ FD_DIRTY_CONST | FD_DIRTY_TEX | FD_DIRTY_IMAGE | FD_DIRTY_SSBO;
+
+ if (!(rsc->dirty & per_stage_dirty))
+ return;
+
+ /* per-shader-stage resources: */
+ for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
+ /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
+ * cmdstream rather than by pointer..
+ */
+ if ((rsc->dirty & FD_DIRTY_CONST) &&
+ !(ctx->dirty_shader[stage] & FD_DIRTY_CONST)) {
+ struct fd_constbuf_stateobj *cb = &ctx->constbuf[stage];
+ const unsigned num_ubos = util_last_bit(cb->enabled_mask);
+ for (unsigned i = 1; i < num_ubos; i++) {
+ if (cb->cb[i].buffer == prsc) {
+ fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_CONST);
+ break;
+ }
+ }
+ }
+
+ /* Textures */
+ if ((rsc->dirty & FD_DIRTY_TEX) &&
+ !(ctx->dirty_shader[stage] & FD_DIRTY_TEX)) {
+ struct fd_texture_stateobj *tex = &ctx->tex[stage];
+ for (unsigned i = 0; i < tex->num_textures; i++) {
+ if (tex->textures[i] && (tex->textures[i]->texture == prsc)) {
+ fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_TEX);
+ break;
+ }
+ }
+ }
+
+ /* Images */
+ if ((rsc->dirty & FD_DIRTY_IMAGE) &&
+ !(ctx->dirty_shader[stage] & FD_DIRTY_IMAGE)) {
+ struct fd_shaderimg_stateobj *si = &ctx->shaderimg[stage];
+ const unsigned num_images = util_last_bit(si->enabled_mask);
+ for (unsigned i = 0; i < num_images; i++) {
+ if (si->si[i].resource == prsc) {
+ fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_IMAGE);
+ break;
+ }
+ }
+ }
+
+ /* SSBOs */
+ if ((rsc->dirty & FD_DIRTY_SSBO) &&
+ !(ctx->dirty_shader[stage] & FD_DIRTY_SSBO)) {
+ struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[stage];
+ const unsigned num_ssbos = util_last_bit(sb->enabled_mask);
+ for (unsigned i = 0; i < num_ssbos; i++) {
+ if (sb->sb[i].buffer == prsc) {
+ fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_SSBO);
+ break;
+ }
+ }
+ }
+ }
}
static void
-rebind_resource(struct fd_resource *rsc)
- assert_dt
+rebind_resource(struct fd_resource *rsc) assert_dt
{
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- fd_screen_lock(screen);
- fd_resource_lock(rsc);
+ fd_screen_lock(screen);
+ fd_resource_lock(rsc);
- if (rsc->dirty)
- list_for_each_entry (struct fd_context, ctx, &screen->context_list, node)
- rebind_resource_in_ctx(ctx, rsc);
+ if (rsc->dirty)
+ list_for_each_entry (struct fd_context, ctx, &screen->context_list, node)
+ rebind_resource_in_ctx(ctx, rsc);
- fd_resource_unlock(rsc);
- fd_screen_unlock(screen);
+ fd_resource_unlock(rsc);
+ fd_screen_unlock(screen);
}
static inline void
fd_resource_set_bo(struct fd_resource *rsc, struct fd_bo *bo)
{
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- rsc->bo = bo;
- rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
+ rsc->bo = bo;
+ rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
}
int
-__fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc,
- unsigned op, const char *func)
+__fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc, unsigned op,
+ const char *func)
{
- if (op & DRM_FREEDRENO_PREP_NOSYNC)
- return fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
+ if (op & DRM_FREEDRENO_PREP_NOSYNC)
+ return fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
- int ret;
+ int ret;
- perf_time_ctx(ctx, 10000, "%s: a busy \"%"PRSC_FMT"\" BO stalled",
- func, PRSC_ARGS(&rsc->b.b)) {
- ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
- }
+ perf_time_ctx(ctx, 10000, "%s: a busy \"%" PRSC_FMT "\" BO stalled", func,
+ PRSC_ARGS(&rsc->b.b))
+ {
+ ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
+ }
- return ret;
+ return ret;
}
static void
realloc_bo(struct fd_resource *rsc, uint32_t size)
{
- struct pipe_resource *prsc = &rsc->b.b;
- struct fd_screen *screen = fd_screen(rsc->b.b.screen);
- uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
- DRM_FREEDRENO_GEM_TYPE_KMEM |
- COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
- /* TODO other flags? */
-
- /* if we start using things other than write-combine,
- * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
- */
-
- if (rsc->bo)
- fd_bo_del(rsc->bo);
-
- struct fd_bo *bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
- prsc->width0, prsc->height0, prsc->depth0, rsc->layout.cpp, prsc->bind);
- fd_resource_set_bo(rsc, bo);
-
- /* Zero out the UBWC area on allocation. This fixes intermittent failures
- * with UBWC, which I suspect are due to the HW having a hard time
- * interpreting arbitrary values populating the flags buffer when the BO
- * was recycled through the bo cache (instead of fresh allocations from
- * the kernel, which are zeroed). sleep(1) in this spot didn't work
- * around the issue, but any memset value seems to.
- */
- if (rsc->layout.ubwc) {
- rsc->needs_ubwc_clear = true;
- }
-
- util_range_set_empty(&rsc->valid_buffer_range);
- fd_bc_invalidate_resource(rsc, true);
+ struct pipe_resource *prsc = &rsc->b.b;
+ struct fd_screen *screen = fd_screen(rsc->b.b.screen);
+ uint32_t flags =
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM |
+ COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
+ /* TODO other flags? */
+
+ /* if we start using things other than write-combine,
+ * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
+ */
+
+ if (rsc->bo)
+ fd_bo_del(rsc->bo);
+
+ struct fd_bo *bo =
+ fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x", prsc->width0,
+ prsc->height0, prsc->depth0, rsc->layout.cpp, prsc->bind);
+ fd_resource_set_bo(rsc, bo);
+
+ /* Zero out the UBWC area on allocation. This fixes intermittent failures
+ * with UBWC, which I suspect are due to the HW having a hard time
+ * interpreting arbitrary values populating the flags buffer when the BO
+ * was recycled through the bo cache (instead of fresh allocations from
+ * the kernel, which are zeroed). sleep(1) in this spot didn't work
+ * around the issue, but any memset value seems to.
+ */
+ if (rsc->layout.ubwc) {
+ rsc->needs_ubwc_clear = true;
+ }
+
+ util_range_set_empty(&rsc->valid_buffer_range);
+ fd_bc_invalidate_resource(rsc, true);
}
static void
-do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
- assert_dt
+do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit,
+ bool fallback) assert_dt
{
- struct pipe_context *pctx = &ctx->base;
-
- /* TODO size threshold too?? */
- if (fallback || !fd_blit(pctx, blit)) {
- /* do blit on cpu: */
- util_resource_copy_region(pctx,
- blit->dst.resource, blit->dst.level, blit->dst.box.x,
- blit->dst.box.y, blit->dst.box.z,
- blit->src.resource, blit->src.level, &blit->src.box);
- }
+ struct pipe_context *pctx = &ctx->base;
+
+ /* TODO size threshold too?? */
+ if (fallback || !fd_blit(pctx, blit)) {
+ /* do blit on cpu: */
+ util_resource_copy_region(pctx, blit->dst.resource, blit->dst.level,
+ blit->dst.box.x, blit->dst.box.y,
+ blit->dst.box.z, blit->src.resource,
+ blit->src.level, &blit->src.box);
+ }
}
/**
*/
void
fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst,
- struct pipe_resource *psrc)
+ struct pipe_resource *psrc)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_resource *dst = fd_resource(pdst);
- struct fd_resource *src = fd_resource(psrc);
-
- DBG("pdst=%p, psrc=%p", pdst, psrc);
-
- /* This should only be called with buffers.. which side-steps some tricker
- * cases, like a rsc that is in a batch-cache key...
- */
- assert(pdst->target == PIPE_BUFFER);
- assert(psrc->target == PIPE_BUFFER);
- assert(dst->track->bc_batch_mask == 0);
- assert(src->track->bc_batch_mask == 0);
- assert(src->track->batch_mask == 0);
- assert(src->track->write_batch == NULL);
- assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0);
-
- /* get rid of any references that batch-cache might have to us (which
- * should empty/destroy rsc->batches hashset)
- *
- * Note that we aren't actually destroying dst, but we are replacing
- * it's storage so we want to go thru the same motions of decoupling
- * it's batch connections.
- */
- fd_bc_invalidate_resource(dst, true);
- rebind_resource(dst);
-
- fd_screen_lock(ctx->screen);
-
- fd_bo_del(dst->bo);
- dst->bo = fd_bo_ref(src->bo);
-
- fd_resource_tracking_reference(&dst->track, src->track);
- src->is_replacement = true;
-
- dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
-
- fd_screen_unlock(ctx->screen);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *dst = fd_resource(pdst);
+ struct fd_resource *src = fd_resource(psrc);
+
+ DBG("pdst=%p, psrc=%p", pdst, psrc);
+
+ /* This should only be called with buffers.. which side-steps some tricker
+ * cases, like a rsc that is in a batch-cache key...
+ */
+ assert(pdst->target == PIPE_BUFFER);
+ assert(psrc->target == PIPE_BUFFER);
+ assert(dst->track->bc_batch_mask == 0);
+ assert(src->track->bc_batch_mask == 0);
+ assert(src->track->batch_mask == 0);
+ assert(src->track->write_batch == NULL);
+ assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0);
+
+ /* get rid of any references that batch-cache might have to us (which
+ * should empty/destroy rsc->batches hashset)
+ *
+ * Note that we aren't actually destroying dst, but we are replacing
+ * it's storage so we want to go thru the same motions of decoupling
+ * it's batch connections.
+ */
+ fd_bc_invalidate_resource(dst, true);
+ rebind_resource(dst);
+
+ fd_screen_lock(ctx->screen);
+
+ fd_bo_del(dst->bo);
+ dst->bo = fd_bo_ref(src->bo);
+
+ fd_resource_tracking_reference(&dst->track, src->track);
+ src->is_replacement = true;
+
+ dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
+
+ fd_screen_unlock(ctx->screen);
}
-static void
-flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage);
+static void flush_resource(struct fd_context *ctx, struct fd_resource *rsc,
+ unsigned usage);
/**
* @rsc: the resource to shadow
*/
static bool
fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
- unsigned level, const struct pipe_box *box, uint64_t modifier)
- assert_dt
+ unsigned level, const struct pipe_box *box,
+ uint64_t modifier) assert_dt
{
- struct pipe_context *pctx = &ctx->base;
- struct pipe_resource *prsc = &rsc->b.b;
- bool fallback = false;
-
- if (prsc->next)
- return false;
-
- /* If you have a sequence where there is a single rsc associated
- * with the current render target, and then you end up shadowing
- * that same rsc on the 3d pipe (u_blitter), because of how we
- * swap the new shadow and rsc before the back-blit, you could end
- * up confusing things into thinking that u_blitter's framebuffer
- * state is the same as the current framebuffer state, which has
- * the result of blitting to rsc rather than shadow.
- *
- * Normally we wouldn't want to unconditionally trigger a flush,
- * since that defeats the purpose of shadowing, but this is a
- * case where we'd have to flush anyways.
- */
- if (rsc->track->write_batch == ctx->batch)
- flush_resource(ctx, rsc, 0);
-
- /* TODO: somehow munge dimensions and format to copy unsupported
- * render target format to something that is supported?
- */
- if (!pctx->screen->is_format_supported(pctx->screen,
- prsc->format, prsc->target, prsc->nr_samples,
- prsc->nr_storage_samples,
- PIPE_BIND_RENDER_TARGET))
- fallback = true;
-
- /* do shadowing back-blits on the cpu for buffers: */
- if (prsc->target == PIPE_BUFFER)
- fallback = true;
-
- bool discard_whole_level = box && util_texrange_covers_whole_level(prsc, level,
- box->x, box->y, box->z, box->width, box->height, box->depth);
-
- /* TODO need to be more clever about current level */
- if ((prsc->target >= PIPE_TEXTURE_2D) && box && !discard_whole_level)
- return false;
-
- struct pipe_resource *pshadow =
- pctx->screen->resource_create_with_modifiers(pctx->screen,
- prsc, &modifier, 1);
-
- if (!pshadow)
- return false;
-
- assert(!ctx->in_shadow);
- ctx->in_shadow = true;
-
- /* get rid of any references that batch-cache might have to us (which
- * should empty/destroy rsc->batches hashset)
- */
- fd_bc_invalidate_resource(rsc, false);
- rebind_resource(rsc);
-
- fd_screen_lock(ctx->screen);
-
- /* Swap the backing bo's, so shadow becomes the old buffer,
- * blit from shadow to new buffer. From here on out, we
- * cannot fail.
- *
- * Note that we need to do it in this order, otherwise if
- * we go down cpu blit path, the recursive transfer_map()
- * sees the wrong status..
- */
- struct fd_resource *shadow = fd_resource(pshadow);
-
- DBG("shadow: %p (%d, %p) -> %p (%d, %p)", rsc, rsc->b.b.reference.count, rsc->track,
- shadow, shadow->b.b.reference.count, shadow->track);
-
- /* TODO valid_buffer_range?? */
- swap(rsc->bo, shadow->bo);
- swap(rsc->layout, shadow->layout);
- rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
-
- /* at this point, the newly created shadow buffer is not referenced
- * by any batches, but the existing rsc (probably) is. We need to
- * transfer those references over:
- */
- debug_assert(shadow->track->batch_mask == 0);
- struct fd_batch *batch;
- foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) {
- struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
- _mesa_set_remove(batch->resources, entry);
- _mesa_set_add(batch->resources, shadow);
- }
- swap(rsc->track, shadow->track);
-
- fd_screen_unlock(ctx->screen);
-
- struct pipe_blit_info blit = {};
- blit.dst.resource = prsc;
- blit.dst.format = prsc->format;
- blit.src.resource = pshadow;
- blit.src.format = pshadow->format;
- blit.mask = util_format_get_mask(prsc->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
-#define set_box(field, val) do { \
- blit.dst.field = (val); \
- blit.src.field = (val); \
- } while (0)
-
- /* Disable occlusion queries during shadow blits. */
- bool saved_active_queries = ctx->active_queries;
- pctx->set_active_query_state(pctx, false);
-
- /* blit the other levels in their entirety: */
- for (unsigned l = 0; l <= prsc->last_level; l++) {
- if (box && l == level)
- continue;
-
- /* just blit whole level: */
- set_box(level, l);
- set_box(box.width, u_minify(prsc->width0, l));
- set_box(box.height, u_minify(prsc->height0, l));
- set_box(box.depth, u_minify(prsc->depth0, l));
-
- for (int i = 0; i < prsc->array_size; i++) {
- set_box(box.z, i);
- do_blit(ctx, &blit, fallback);
- }
- }
-
- /* deal w/ current level specially, since we might need to split
- * it up into a couple blits:
- */
- if (box && !discard_whole_level) {
- set_box(level, level);
-
- switch (prsc->target) {
- case PIPE_BUFFER:
- case PIPE_TEXTURE_1D:
- set_box(box.y, 0);
- set_box(box.z, 0);
- set_box(box.height, 1);
- set_box(box.depth, 1);
-
- if (box->x > 0) {
- set_box(box.x, 0);
- set_box(box.width, box->x);
-
- do_blit(ctx, &blit, fallback);
- }
- if ((box->x + box->width) < u_minify(prsc->width0, level)) {
- set_box(box.x, box->x + box->width);
- set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
-
- do_blit(ctx, &blit, fallback);
- }
- break;
- case PIPE_TEXTURE_2D:
- /* TODO */
- default:
- unreachable("TODO");
- }
- }
-
- pctx->set_active_query_state(pctx, saved_active_queries);
-
- ctx->in_shadow = false;
-
- pipe_resource_reference(&pshadow, NULL);
-
- return true;
+ struct pipe_context *pctx = &ctx->base;
+ struct pipe_resource *prsc = &rsc->b.b;
+ bool fallback = false;
+
+ if (prsc->next)
+ return false;
+
+ /* If you have a sequence where there is a single rsc associated
+ * with the current render target, and then you end up shadowing
+ * that same rsc on the 3d pipe (u_blitter), because of how we
+ * swap the new shadow and rsc before the back-blit, you could end
+ * up confusing things into thinking that u_blitter's framebuffer
+ * state is the same as the current framebuffer state, which has
+ * the result of blitting to rsc rather than shadow.
+ *
+ * Normally we wouldn't want to unconditionally trigger a flush,
+ * since that defeats the purpose of shadowing, but this is a
+ * case where we'd have to flush anyways.
+ */
+ if (rsc->track->write_batch == ctx->batch)
+ flush_resource(ctx, rsc, 0);
+
+ /* TODO: somehow munge dimensions and format to copy unsupported
+ * render target format to something that is supported?
+ */
+ if (!pctx->screen->is_format_supported(
+ pctx->screen, prsc->format, prsc->target, prsc->nr_samples,
+ prsc->nr_storage_samples, PIPE_BIND_RENDER_TARGET))
+ fallback = true;
+
+ /* do shadowing back-blits on the cpu for buffers: */
+ if (prsc->target == PIPE_BUFFER)
+ fallback = true;
+
+ bool discard_whole_level = box && util_texrange_covers_whole_level(
+ prsc, level, box->x, box->y, box->z,
+ box->width, box->height, box->depth);
+
+ /* TODO need to be more clever about current level */
+ if ((prsc->target >= PIPE_TEXTURE_2D) && box && !discard_whole_level)
+ return false;
+
+ struct pipe_resource *pshadow = pctx->screen->resource_create_with_modifiers(
+ pctx->screen, prsc, &modifier, 1);
+
+ if (!pshadow)
+ return false;
+
+ assert(!ctx->in_shadow);
+ ctx->in_shadow = true;
+
+ /* get rid of any references that batch-cache might have to us (which
+ * should empty/destroy rsc->batches hashset)
+ */
+ fd_bc_invalidate_resource(rsc, false);
+ rebind_resource(rsc);
+
+ fd_screen_lock(ctx->screen);
+
+ /* Swap the backing bo's, so shadow becomes the old buffer,
+ * blit from shadow to new buffer. From here on out, we
+ * cannot fail.
+ *
+ * Note that we need to do it in this order, otherwise if
+ * we go down cpu blit path, the recursive transfer_map()
+ * sees the wrong status..
+ */
+ struct fd_resource *shadow = fd_resource(pshadow);
+
+ DBG("shadow: %p (%d, %p) -> %p (%d, %p)", rsc, rsc->b.b.reference.count,
+ rsc->track, shadow, shadow->b.b.reference.count, shadow->track);
+
+ /* TODO valid_buffer_range?? */
+ swap(rsc->bo, shadow->bo);
+ swap(rsc->layout, shadow->layout);
+ rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
+
+ /* at this point, the newly created shadow buffer is not referenced
+ * by any batches, but the existing rsc (probably) is. We need to
+ * transfer those references over:
+ */
+ debug_assert(shadow->track->batch_mask == 0);
+ struct fd_batch *batch;
+ foreach_batch(batch, &ctx->screen->batch_cache, rsc->track->batch_mask)
+ {
+ struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
+ _mesa_set_remove(batch->resources, entry);
+ _mesa_set_add(batch->resources, shadow);
+ }
+ swap(rsc->track, shadow->track);
+
+ fd_screen_unlock(ctx->screen);
+
+ struct pipe_blit_info blit = {};
+ blit.dst.resource = prsc;
+ blit.dst.format = prsc->format;
+ blit.src.resource = pshadow;
+ blit.src.format = pshadow->format;
+ blit.mask = util_format_get_mask(prsc->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+#define set_box(field, val) \
+ do { \
+ blit.dst.field = (val); \
+ blit.src.field = (val); \
+ } while (0)
+
+ /* Disable occlusion queries during shadow blits. */
+ bool saved_active_queries = ctx->active_queries;
+ pctx->set_active_query_state(pctx, false);
+
+ /* blit the other levels in their entirety: */
+ for (unsigned l = 0; l <= prsc->last_level; l++) {
+ if (box && l == level)
+ continue;
+
+ /* just blit whole level: */
+ set_box(level, l);
+ set_box(box.width, u_minify(prsc->width0, l));
+ set_box(box.height, u_minify(prsc->height0, l));
+ set_box(box.depth, u_minify(prsc->depth0, l));
+
+ for (int i = 0; i < prsc->array_size; i++) {
+ set_box(box.z, i);
+ do_blit(ctx, &blit, fallback);
+ }
+ }
+
+ /* deal w/ current level specially, since we might need to split
+ * it up into a couple blits:
+ */
+ if (box && !discard_whole_level) {
+ set_box(level, level);
+
+ switch (prsc->target) {
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ set_box(box.y, 0);
+ set_box(box.z, 0);
+ set_box(box.height, 1);
+ set_box(box.depth, 1);
+
+ if (box->x > 0) {
+ set_box(box.x, 0);
+ set_box(box.width, box->x);
+
+ do_blit(ctx, &blit, fallback);
+ }
+ if ((box->x + box->width) < u_minify(prsc->width0, level)) {
+ set_box(box.x, box->x + box->width);
+ set_box(box.width,
+ u_minify(prsc->width0, level) - (box->x + box->width));
+
+ do_blit(ctx, &blit, fallback);
+ }
+ break;
+ case PIPE_TEXTURE_2D:
+ /* TODO */
+ default:
+ unreachable("TODO");
+ }
+ }
+
+ pctx->set_active_query_state(pctx, saved_active_queries);
+
+ ctx->in_shadow = false;
+
+ pipe_resource_reference(&pshadow, NULL);
+
+ return true;
}
/**
void
fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc)
{
- tc_assert_driver_thread(ctx->tc);
+ tc_assert_driver_thread(ctx->tc);
- bool success =
- fd_try_shadow_resource(ctx, rsc, 0, NULL, FD_FORMAT_MOD_QCOM_TILED);
+ bool success =
+ fd_try_shadow_resource(ctx, rsc, 0, NULL, FD_FORMAT_MOD_QCOM_TILED);
- /* shadow should not fail in any cases where we need to uncompress: */
- debug_assert(success);
+ /* shadow should not fail in any cases where we need to uncompress: */
+ debug_assert(success);
}
/**
void
fd_resource_dump(struct fd_resource *rsc, const char *name)
{
- fd_bo_cpu_prep(rsc->bo, NULL, DRM_FREEDRENO_PREP_READ);
- printf("%s: \n", name);
- dump_hex(fd_bo_map(rsc->bo), fd_bo_size(rsc->bo));
+ fd_bo_cpu_prep(rsc->bo, NULL, DRM_FREEDRENO_PREP_READ);
+ printf("%s: \n", name);
+ dump_hex(fd_bo_map(rsc->bo), fd_bo_size(rsc->bo));
}
static struct fd_resource *
fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
- unsigned level, const struct pipe_box *box)
+ unsigned level, const struct pipe_box *box)
{
- struct pipe_context *pctx = &ctx->base;
- struct pipe_resource tmpl = rsc->b.b;
-
- tmpl.width0 = box->width;
- tmpl.height0 = box->height;
- /* for array textures, box->depth is the array_size, otherwise
- * for 3d textures, it is the depth:
- */
- if (tmpl.array_size > 1) {
- if (tmpl.target == PIPE_TEXTURE_CUBE)
- tmpl.target = PIPE_TEXTURE_2D_ARRAY;
- tmpl.array_size = box->depth;
- tmpl.depth0 = 1;
- } else {
- tmpl.array_size = 1;
- tmpl.depth0 = box->depth;
- }
- tmpl.last_level = 0;
- tmpl.bind |= PIPE_BIND_LINEAR;
- tmpl.usage = PIPE_USAGE_STAGING;
-
- struct pipe_resource *pstaging =
- pctx->screen->resource_create(pctx->screen, &tmpl);
- if (!pstaging)
- return NULL;
-
- return fd_resource(pstaging);
+ struct pipe_context *pctx = &ctx->base;
+ struct pipe_resource tmpl = rsc->b.b;
+
+ tmpl.width0 = box->width;
+ tmpl.height0 = box->height;
+ /* for array textures, box->depth is the array_size, otherwise
+ * for 3d textures, it is the depth:
+ */
+ if (tmpl.array_size > 1) {
+ if (tmpl.target == PIPE_TEXTURE_CUBE)
+ tmpl.target = PIPE_TEXTURE_2D_ARRAY;
+ tmpl.array_size = box->depth;
+ tmpl.depth0 = 1;
+ } else {
+ tmpl.array_size = 1;
+ tmpl.depth0 = box->depth;
+ }
+ tmpl.last_level = 0;
+ tmpl.bind |= PIPE_BIND_LINEAR;
+ tmpl.usage = PIPE_USAGE_STAGING;
+
+ struct pipe_resource *pstaging =
+ pctx->screen->resource_create(pctx->screen, &tmpl);
+ if (!pstaging)
+ return NULL;
+
+ return fd_resource(pstaging);
}
static void
-fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
- assert_dt
+fd_blit_from_staging(struct fd_context *ctx,
+ struct fd_transfer *trans) assert_dt
{
- struct pipe_resource *dst = trans->b.b.resource;
- struct pipe_blit_info blit = {};
-
- blit.dst.resource = dst;
- blit.dst.format = dst->format;
- blit.dst.level = trans->b.b.level;
- blit.dst.box = trans->b.b.box;
- blit.src.resource = trans->staging_prsc;
- blit.src.format = trans->staging_prsc->format;
- blit.src.level = 0;
- blit.src.box = trans->staging_box;
- blit.mask = util_format_get_mask(trans->staging_prsc->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
- do_blit(ctx, &blit, false);
+ struct pipe_resource *dst = trans->b.b.resource;
+ struct pipe_blit_info blit = {};
+
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.level = trans->b.b.level;
+ blit.dst.box = trans->b.b.box;
+ blit.src.resource = trans->staging_prsc;
+ blit.src.format = trans->staging_prsc->format;
+ blit.src.level = 0;
+ blit.src.box = trans->staging_box;
+ blit.mask = util_format_get_mask(trans->staging_prsc->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ do_blit(ctx, &blit, false);
}
static void
-fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
- assert_dt
+fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans) assert_dt
{
- struct pipe_resource *src = trans->b.b.resource;
- struct pipe_blit_info blit = {};
-
- blit.src.resource = src;
- blit.src.format = src->format;
- blit.src.level = trans->b.b.level;
- blit.src.box = trans->b.b.box;
- blit.dst.resource = trans->staging_prsc;
- blit.dst.format = trans->staging_prsc->format;
- blit.dst.level = 0;
- blit.dst.box = trans->staging_box;
- blit.mask = util_format_get_mask(trans->staging_prsc->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
- do_blit(ctx, &blit, false);
+ struct pipe_resource *src = trans->b.b.resource;
+ struct pipe_blit_info blit = {};
+
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.level = trans->b.b.level;
+ blit.src.box = trans->b.b.box;
+ blit.dst.resource = trans->staging_prsc;
+ blit.dst.format = trans->staging_prsc->format;
+ blit.dst.level = 0;
+ blit.dst.box = trans->staging_box;
+ blit.mask = util_format_get_mask(trans->staging_prsc->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ do_blit(ctx, &blit, false);
}
-static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
- struct pipe_transfer *ptrans,
- const struct pipe_box *box)
+static void
+fd_resource_transfer_flush_region(struct pipe_context *pctx,
+ struct pipe_transfer *ptrans,
+ const struct pipe_box *box)
{
- struct fd_resource *rsc = fd_resource(ptrans->resource);
+ struct fd_resource *rsc = fd_resource(ptrans->resource);
- if (ptrans->resource->target == PIPE_BUFFER)
- util_range_add(&rsc->b.b, &rsc->valid_buffer_range,
- ptrans->box.x + box->x,
- ptrans->box.x + box->x + box->width);
+ if (ptrans->resource->target == PIPE_BUFFER)
+ util_range_add(&rsc->b.b, &rsc->valid_buffer_range,
+ ptrans->box.x + box->x,
+ ptrans->box.x + box->x + box->width);
}
static void
-flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage)
- assert_dt
+flush_resource(struct fd_context *ctx, struct fd_resource *rsc,
+ unsigned usage) assert_dt
{
- struct fd_batch *write_batch = NULL;
-
- fd_screen_lock(ctx->screen);
- fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
- fd_screen_unlock(ctx->screen);
-
- if (usage & PIPE_MAP_WRITE) {
- struct fd_batch *batch, *batches[32] = {};
- uint32_t batch_mask;
-
- /* This is a bit awkward, probably a fd_batch_flush_locked()
- * would make things simpler.. but we need to hold the lock
- * to iterate the batches which reference this resource. So
- * we must first grab references under a lock, then flush.
- */
- fd_screen_lock(ctx->screen);
- batch_mask = rsc->track->batch_mask;
- foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
- fd_batch_reference_locked(&batches[batch->idx], batch);
- fd_screen_unlock(ctx->screen);
-
- foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
- fd_batch_flush(batch);
-
- foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) {
- fd_batch_reference(&batches[batch->idx], NULL);
- }
- assert(rsc->track->batch_mask == 0);
- } else if (write_batch) {
- fd_batch_flush(write_batch);
- }
-
- fd_batch_reference(&write_batch, NULL);
-
- assert(!rsc->track->write_batch);
+ struct fd_batch *write_batch = NULL;
+
+ fd_screen_lock(ctx->screen);
+ fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
+ fd_screen_unlock(ctx->screen);
+
+ if (usage & PIPE_MAP_WRITE) {
+ struct fd_batch *batch, *batches[32] = {};
+ uint32_t batch_mask;
+
+ /* This is a bit awkward, probably a fd_batch_flush_locked()
+ * would make things simpler.. but we need to hold the lock
+ * to iterate the batches which reference this resource. So
+ * we must first grab references under a lock, then flush.
+ */
+ fd_screen_lock(ctx->screen);
+ batch_mask = rsc->track->batch_mask;
+ foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
+ fd_batch_reference_locked(&batches[batch->idx], batch);
+ fd_screen_unlock(ctx->screen);
+
+ foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
+ fd_batch_flush(batch);
+
+ foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
+ {
+ fd_batch_reference(&batches[batch->idx], NULL);
+ }
+ assert(rsc->track->batch_mask == 0);
+ } else if (write_batch) {
+ fd_batch_flush(write_batch);
+ }
+
+ fd_batch_reference(&write_batch, NULL);
+
+ assert(!rsc->track->write_batch);
}
static void
-fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
- in_dt
+fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) in_dt
{
- flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_MAP_READ);
+ flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_MAP_READ);
}
static void
fd_resource_transfer_unmap(struct pipe_context *pctx,
- struct pipe_transfer *ptrans)
- in_dt /* TODO for threaded-ctx we'll need to split out unsynchronized path */
+ struct pipe_transfer *ptrans)
+ in_dt /* TODO for threaded-ctx we'll need to split out unsynchronized path */
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_resource *rsc = fd_resource(ptrans->resource);
- struct fd_transfer *trans = fd_transfer(ptrans);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(ptrans->resource);
+ struct fd_transfer *trans = fd_transfer(ptrans);
- if (trans->staging_prsc) {
- if (ptrans->usage & PIPE_MAP_WRITE)
- fd_blit_from_staging(ctx, trans);
- pipe_resource_reference(&trans->staging_prsc, NULL);
- }
+ if (trans->staging_prsc) {
+ if (ptrans->usage & PIPE_MAP_WRITE)
+ fd_blit_from_staging(ctx, trans);
+ pipe_resource_reference(&trans->staging_prsc, NULL);
+ }
- if (!(ptrans->usage & PIPE_MAP_UNSYNCHRONIZED)) {
- fd_bo_cpu_fini(rsc->bo);
- }
+ if (!(ptrans->usage & PIPE_MAP_UNSYNCHRONIZED)) {
+ fd_bo_cpu_fini(rsc->bo);
+ }
- util_range_add(&rsc->b.b, &rsc->valid_buffer_range,
- ptrans->box.x,
- ptrans->box.x + ptrans->box.width);
+ util_range_add(&rsc->b.b, &rsc->valid_buffer_range, ptrans->box.x,
+ ptrans->box.x + ptrans->box.width);
- pipe_resource_reference(&ptrans->resource, NULL);
+ pipe_resource_reference(&ptrans->resource, NULL);
- assert(trans->b.staging == NULL); /* for threaded context only */
+ assert(trans->b.staging == NULL); /* for threaded context only */
- /* Don't use pool_transfers_unsync. We are always in the driver
- * thread. Freeing an object into a different pool is allowed.
- */
- slab_free(&ctx->transfer_pool, ptrans);
+ /* Don't use pool_transfers_unsync. We are always in the driver
+ * thread. Freeing an object into a different pool is allowed.
+ */
+ slab_free(&ctx->transfer_pool, ptrans);
}
static unsigned
translate_usage(unsigned usage)
{
- uint32_t op = 0;
+ uint32_t op = 0;
- if (usage & PIPE_MAP_READ)
- op |= DRM_FREEDRENO_PREP_READ;
+ if (usage & PIPE_MAP_READ)
+ op |= DRM_FREEDRENO_PREP_READ;
- if (usage & PIPE_MAP_WRITE)
- op |= DRM_FREEDRENO_PREP_WRITE;
+ if (usage & PIPE_MAP_WRITE)
+ op |= DRM_FREEDRENO_PREP_WRITE;
- return op;
+ return op;
}
static void
-invalidate_resource(struct fd_resource *rsc, unsigned usage)
- assert_dt
+invalidate_resource(struct fd_resource *rsc, unsigned usage) assert_dt
{
- bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE));
- unsigned op = translate_usage(usage);
-
- if (needs_flush || fd_resource_busy(rsc, op)) {
- rebind_resource(rsc);
- realloc_bo(rsc, fd_bo_size(rsc->bo));
- } else {
- util_range_set_empty(&rsc->valid_buffer_range);
- }
+ bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE));
+ unsigned op = translate_usage(usage);
+
+ if (needs_flush || fd_resource_busy(rsc, op)) {
+ rebind_resource(rsc);
+ realloc_bo(rsc, fd_bo_size(rsc->bo));
+ } else {
+ util_range_set_empty(&rsc->valid_buffer_range);
+ }
}
static void *
resource_transfer_map_unsync(struct pipe_context *pctx,
- struct pipe_resource *prsc,
- unsigned level, unsigned usage,
- const struct pipe_box *box,
- struct fd_transfer *trans)
+ struct pipe_resource *prsc, unsigned level,
+ unsigned usage, const struct pipe_box *box,
+ struct fd_transfer *trans)
{
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format format = prsc->format;
- uint32_t offset;
- char *buf;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format format = prsc->format;
+ uint32_t offset;
+ char *buf;
- buf = fd_bo_map(rsc->bo);
- offset =
- box->y / util_format_get_blockheight(format) * trans->b.b.stride +
- box->x / util_format_get_blockwidth(format) * rsc->layout.cpp +
- fd_resource_offset(rsc, level, box->z);
+ buf = fd_bo_map(rsc->bo);
+ offset = box->y / util_format_get_blockheight(format) * trans->b.b.stride +
+ box->x / util_format_get_blockwidth(format) * rsc->layout.cpp +
+ fd_resource_offset(rsc, level, box->z);
- if (usage & PIPE_MAP_WRITE)
- rsc->valid = true;
+ if (usage & PIPE_MAP_WRITE)
+ rsc->valid = true;
- return buf + offset;
+ return buf + offset;
}
/**
* either driver or frontend thread.
*/
static void *
-resource_transfer_map(struct pipe_context *pctx,
- struct pipe_resource *prsc,
- unsigned level, unsigned usage,
- const struct pipe_box *box,
- struct fd_transfer *trans)
- in_dt
+resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ struct fd_transfer *trans) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_resource *rsc = fd_resource(prsc);
- char *buf;
- int ret = 0;
-
- tc_assert_driver_thread(ctx->tc);
-
- /* we always need a staging texture for tiled buffers:
- *
- * TODO we might sometimes want to *also* shadow the resource to avoid
- * splitting a batch.. for ex, mid-frame texture uploads to a tiled
- * texture.
- */
- if (rsc->layout.tile_mode) {
- struct fd_resource *staging_rsc;
-
- assert(prsc->target != PIPE_BUFFER);
-
- staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
- if (staging_rsc) {
- trans->staging_prsc = &staging_rsc->b.b;
- trans->b.b.stride = fd_resource_pitch(staging_rsc, 0);
- trans->b.b.layer_stride = fd_resource_layer_stride(staging_rsc, 0);
- trans->staging_box = *box;
- trans->staging_box.x = 0;
- trans->staging_box.y = 0;
- trans->staging_box.z = 0;
-
- if (usage & PIPE_MAP_READ) {
- fd_blit_to_staging(ctx, trans);
-
- fd_resource_wait(ctx, staging_rsc,
- DRM_FREEDRENO_PREP_READ);
- }
-
- buf = fd_bo_map(staging_rsc->bo);
-
- ctx->stats.staging_uploads++;
-
- return buf;
- }
- }
-
- if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
- invalidate_resource(rsc, usage);
- } else {
- struct fd_batch *write_batch = NULL;
-
- /* hold a reference, so it doesn't disappear under us: */
- fd_screen_lock(ctx->screen);
- fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
- fd_screen_unlock(ctx->screen);
-
- if ((usage & PIPE_MAP_WRITE) && write_batch &&
- write_batch->back_blit) {
- /* if only thing pending is a back-blit, we can discard it: */
- fd_batch_reset(write_batch);
- }
-
- unsigned op = translate_usage(usage);
- bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE));
-
- /* If the GPU is writing to the resource, or if it is reading from the
- * resource and we're trying to write to it, flush the renders.
- */
- bool busy = needs_flush || fd_resource_busy(rsc, op);
-
- /* if we need to flush/stall, see if we can make a shadow buffer
- * to avoid this:
- *
- * TODO we could go down this path !reorder && !busy_for_read
- * ie. we only *don't* want to go down this path if the blit
- * will trigger a flush!
- */
- if (ctx->screen->reorder && busy && !(usage & PIPE_MAP_READ) &&
- (usage & PIPE_MAP_DISCARD_RANGE)) {
- assert(!(usage & TC_TRANSFER_MAP_NO_INVALIDATE));
-
- /* try shadowing only if it avoids a flush, otherwise staging would
- * be better:
- */
- if (needs_flush && fd_try_shadow_resource(ctx, rsc, level,
- box, DRM_FORMAT_MOD_LINEAR)) {
- needs_flush = busy = false;
- ctx->stats.shadow_uploads++;
- } else {
- struct fd_resource *staging_rsc;
-
- if (needs_flush) {
- flush_resource(ctx, rsc, usage);
- needs_flush = false;
- }
-
- /* in this case, we don't need to shadow the whole resource,
- * since any draw that references the previous contents has
- * already had rendering flushed for all tiles. So we can
- * use a staging buffer to do the upload.
- */
- staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
- if (staging_rsc) {
- trans->staging_prsc = &staging_rsc->b.b;
- trans->b.b.stride = fd_resource_pitch(staging_rsc, 0);
- trans->b.b.layer_stride =
- fd_resource_layer_stride(staging_rsc, 0);
- trans->staging_box = *box;
- trans->staging_box.x = 0;
- trans->staging_box.y = 0;
- trans->staging_box.z = 0;
- buf = fd_bo_map(staging_rsc->bo);
-
- fd_batch_reference(&write_batch, NULL);
-
- ctx->stats.staging_uploads++;
-
- return buf;
- }
- }
- }
-
- if (needs_flush) {
- flush_resource(ctx, rsc, usage);
- needs_flush = false;
- }
-
- fd_batch_reference(&write_batch, NULL);
-
- /* The GPU keeps track of how the various bo's are being used, and
- * will wait if necessary for the proper operation to have
- * completed.
- */
- if (busy) {
- ret = fd_resource_wait(ctx, rsc, op);
- if (ret)
- return NULL;
- }
- }
-
- return resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(prsc);
+ char *buf;
+ int ret = 0;
+
+ tc_assert_driver_thread(ctx->tc);
+
+ /* we always need a staging texture for tiled buffers:
+ *
+ * TODO we might sometimes want to *also* shadow the resource to avoid
+ * splitting a batch.. for ex, mid-frame texture uploads to a tiled
+ * texture.
+ */
+ if (rsc->layout.tile_mode) {
+ struct fd_resource *staging_rsc;
+
+ assert(prsc->target != PIPE_BUFFER);
+
+ staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
+ if (staging_rsc) {
+ trans->staging_prsc = &staging_rsc->b.b;
+ trans->b.b.stride = fd_resource_pitch(staging_rsc, 0);
+ trans->b.b.layer_stride = fd_resource_layer_stride(staging_rsc, 0);
+ trans->staging_box = *box;
+ trans->staging_box.x = 0;
+ trans->staging_box.y = 0;
+ trans->staging_box.z = 0;
+
+ if (usage & PIPE_MAP_READ) {
+ fd_blit_to_staging(ctx, trans);
+
+ fd_resource_wait(ctx, staging_rsc, DRM_FREEDRENO_PREP_READ);
+ }
+
+ buf = fd_bo_map(staging_rsc->bo);
+
+ ctx->stats.staging_uploads++;
+
+ return buf;
+ }
+ }
+
+ if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
+ invalidate_resource(rsc, usage);
+ } else {
+ struct fd_batch *write_batch = NULL;
+
+ /* hold a reference, so it doesn't disappear under us: */
+ fd_screen_lock(ctx->screen);
+ fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
+ fd_screen_unlock(ctx->screen);
+
+ if ((usage & PIPE_MAP_WRITE) && write_batch && write_batch->back_blit) {
+ /* if only thing pending is a back-blit, we can discard it: */
+ fd_batch_reset(write_batch);
+ }
+
+ unsigned op = translate_usage(usage);
+ bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE));
+
+ /* If the GPU is writing to the resource, or if it is reading from the
+ * resource and we're trying to write to it, flush the renders.
+ */
+ bool busy = needs_flush || fd_resource_busy(rsc, op);
+
+ /* if we need to flush/stall, see if we can make a shadow buffer
+ * to avoid this:
+ *
+ * TODO we could go down this path !reorder && !busy_for_read
+ * ie. we only *don't* want to go down this path if the blit
+ * will trigger a flush!
+ */
+ if (ctx->screen->reorder && busy && !(usage & PIPE_MAP_READ) &&
+ (usage & PIPE_MAP_DISCARD_RANGE)) {
+ assert(!(usage & TC_TRANSFER_MAP_NO_INVALIDATE));
+
+ /* try shadowing only if it avoids a flush, otherwise staging would
+ * be better:
+ */
+ if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box,
+ DRM_FORMAT_MOD_LINEAR)) {
+ needs_flush = busy = false;
+ ctx->stats.shadow_uploads++;
+ } else {
+ struct fd_resource *staging_rsc;
+
+ if (needs_flush) {
+ flush_resource(ctx, rsc, usage);
+ needs_flush = false;
+ }
+
+ /* in this case, we don't need to shadow the whole resource,
+ * since any draw that references the previous contents has
+ * already had rendering flushed for all tiles. So we can
+ * use a staging buffer to do the upload.
+ */
+ staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
+ if (staging_rsc) {
+ trans->staging_prsc = &staging_rsc->b.b;
+ trans->b.b.stride = fd_resource_pitch(staging_rsc, 0);
+ trans->b.b.layer_stride =
+ fd_resource_layer_stride(staging_rsc, 0);
+ trans->staging_box = *box;
+ trans->staging_box.x = 0;
+ trans->staging_box.y = 0;
+ trans->staging_box.z = 0;
+ buf = fd_bo_map(staging_rsc->bo);
+
+ fd_batch_reference(&write_batch, NULL);
+
+ ctx->stats.staging_uploads++;
+
+ return buf;
+ }
+ }
+ }
+
+ if (needs_flush) {
+ flush_resource(ctx, rsc, usage);
+ needs_flush = false;
+ }
+
+ fd_batch_reference(&write_batch, NULL);
+
+ /* The GPU keeps track of how the various bo's are being used, and
+ * will wait if necessary for the proper operation to have
+ * completed.
+ */
+ if (busy) {
+ ret = fd_resource_wait(ctx, rsc, op);
+ if (ret)
+ return NULL;
+ }
+ }
+
+ return resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans);
}
static unsigned
improve_transfer_map_usage(struct fd_context *ctx, struct fd_resource *rsc,
- unsigned usage, const struct pipe_box *box)
- /* Not *strictly* true, but the access to things that must only be in driver-
- * thread are protected by !(usage & TC_TRANSFER_MAP_THREADED_UNSYNC):
- */
- in_dt
+ unsigned usage, const struct pipe_box *box)
+ /* Not *strictly* true, but the access to things that must only be in driver-
+ * thread are protected by !(usage & TC_TRANSFER_MAP_THREADED_UNSYNC):
+ */
+ in_dt
{
- if (usage & TC_TRANSFER_MAP_NO_INVALIDATE) {
- usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
- usage &= ~PIPE_MAP_DISCARD_RANGE;
- }
-
- if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
- usage |= PIPE_MAP_UNSYNCHRONIZED;
-
- if (!(usage & (TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED |
- PIPE_MAP_UNSYNCHRONIZED))) {
- if (ctx->in_shadow && !(usage & PIPE_MAP_READ)) {
- usage |= PIPE_MAP_UNSYNCHRONIZED;
- } else if ((usage & PIPE_MAP_WRITE) &&
- (rsc->b.b.target == PIPE_BUFFER) &&
- !util_ranges_intersect(&rsc->valid_buffer_range,
- box->x, box->x + box->width)) {
- /* We are trying to write to a previously uninitialized range. No need
- * to synchronize.
- */
- usage |= PIPE_MAP_UNSYNCHRONIZED;
- }
- }
-
- return usage;
+ if (usage & TC_TRANSFER_MAP_NO_INVALIDATE) {
+ usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
+ usage &= ~PIPE_MAP_DISCARD_RANGE;
+ }
+
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
+
+ if (!(usage &
+ (TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED | PIPE_MAP_UNSYNCHRONIZED))) {
+ if (ctx->in_shadow && !(usage & PIPE_MAP_READ)) {
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
+ } else if ((usage & PIPE_MAP_WRITE) && (rsc->b.b.target == PIPE_BUFFER) &&
+ !util_ranges_intersect(&rsc->valid_buffer_range, box->x,
+ box->x + box->width)) {
+ /* We are trying to write to a previously uninitialized range. No need
+ * to synchronize.
+ */
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
+ }
+ }
+
+ return usage;
}
static void *
-fd_resource_transfer_map(struct pipe_context *pctx,
- struct pipe_resource *prsc,
- unsigned level, unsigned usage,
- const struct pipe_box *box,
- struct pipe_transfer **pptrans)
+fd_resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **pptrans)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_resource *rsc = fd_resource(prsc);
- struct fd_transfer *trans;
- struct pipe_transfer *ptrans;
-
- DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
- box->width, box->height, box->x, box->y);
-
- if ((usage & PIPE_MAP_DIRECTLY) && rsc->layout.tile_mode) {
- DBG("CANNOT MAP DIRECTLY!\n");
- return NULL;
- }
-
- if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) {
- ptrans = slab_alloc(&ctx->transfer_pool_unsync);
- } else {
- ptrans = slab_alloc(&ctx->transfer_pool);
- }
-
- if (!ptrans)
- return NULL;
-
- /* slab_alloc_st() doesn't zero: */
- trans = fd_transfer(ptrans);
- memset(trans, 0, sizeof(*trans));
-
- usage = improve_transfer_map_usage(ctx, rsc, usage, box);
-
- pipe_resource_reference(&ptrans->resource, prsc);
- ptrans->level = level;
- ptrans->usage = usage;
- ptrans->box = *box;
- ptrans->stride = fd_resource_pitch(rsc, level);
- ptrans->layer_stride = fd_resource_layer_stride(rsc, level);
-
- void *ret;
- if (usage & PIPE_MAP_UNSYNCHRONIZED) {
- ret = resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans);
- } else {
- ret = resource_transfer_map(pctx, prsc, level, usage, box, trans);
- }
-
- if (ret) {
- *pptrans = ptrans;
- } else {
- fd_resource_transfer_unmap(pctx, ptrans);
- }
-
- return ret;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(prsc);
+ struct fd_transfer *trans;
+ struct pipe_transfer *ptrans;
+
+ DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
+ box->width, box->height, box->x, box->y);
+
+ if ((usage & PIPE_MAP_DIRECTLY) && rsc->layout.tile_mode) {
+ DBG("CANNOT MAP DIRECTLY!\n");
+ return NULL;
+ }
+
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) {
+ ptrans = slab_alloc(&ctx->transfer_pool_unsync);
+ } else {
+ ptrans = slab_alloc(&ctx->transfer_pool);
+ }
+
+ if (!ptrans)
+ return NULL;
+
+ /* slab_alloc_st() doesn't zero: */
+ trans = fd_transfer(ptrans);
+ memset(trans, 0, sizeof(*trans));
+
+ usage = improve_transfer_map_usage(ctx, rsc, usage, box);
+
+ pipe_resource_reference(&ptrans->resource, prsc);
+ ptrans->level = level;
+ ptrans->usage = usage;
+ ptrans->box = *box;
+ ptrans->stride = fd_resource_pitch(rsc, level);
+ ptrans->layer_stride = fd_resource_layer_stride(rsc, level);
+
+ void *ret;
+ if (usage & PIPE_MAP_UNSYNCHRONIZED) {
+ ret = resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans);
+ } else {
+ ret = resource_transfer_map(pctx, prsc, level, usage, box, trans);
+ }
+
+ if (ret) {
+ *pptrans = ptrans;
+ } else {
+ fd_resource_transfer_unmap(pctx, ptrans);
+ }
+
+ return ret;
}
static void
-fd_resource_destroy(struct pipe_screen *pscreen,
- struct pipe_resource *prsc)
+fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
{
- struct fd_resource *rsc = fd_resource(prsc);
+ struct fd_resource *rsc = fd_resource(prsc);
- if (!rsc->is_replacement)
- fd_bc_invalidate_resource(rsc, true);
- if (rsc->bo)
- fd_bo_del(rsc->bo);
- if (rsc->lrz)
- fd_bo_del(rsc->lrz);
- if (rsc->scanout)
- renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
+ if (!rsc->is_replacement)
+ fd_bc_invalidate_resource(rsc, true);
+ if (rsc->bo)
+ fd_bo_del(rsc->bo);
+ if (rsc->lrz)
+ fd_bo_del(rsc->lrz);
+ if (rsc->scanout)
+ renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
- threaded_resource_deinit(prsc);
+ threaded_resource_deinit(prsc);
- util_range_destroy(&rsc->valid_buffer_range);
- simple_mtx_destroy(&rsc->lock);
- fd_resource_tracking_reference(&rsc->track, NULL);
+ util_range_destroy(&rsc->valid_buffer_range);
+ simple_mtx_destroy(&rsc->lock);
+ fd_resource_tracking_reference(&rsc->track, NULL);
- FREE(rsc);
+ FREE(rsc);
}
static uint64_t
fd_resource_modifier(struct fd_resource *rsc)
{
- if (!rsc->layout.tile_mode)
- return DRM_FORMAT_MOD_LINEAR;
+ if (!rsc->layout.tile_mode)
+ return DRM_FORMAT_MOD_LINEAR;
- if (rsc->layout.ubwc_layer_size)
- return DRM_FORMAT_MOD_QCOM_COMPRESSED;
+ if (rsc->layout.ubwc_layer_size)
+ return DRM_FORMAT_MOD_QCOM_COMPRESSED;
- /* TODO invent a modifier for tiled but not UBWC buffers: */
- return DRM_FORMAT_MOD_INVALID;
+ /* TODO invent a modifier for tiled but not UBWC buffers: */
+ return DRM_FORMAT_MOD_INVALID;
}
static bool
-fd_resource_get_handle(struct pipe_screen *pscreen,
- struct pipe_context *pctx,
- struct pipe_resource *prsc,
- struct winsys_handle *handle,
- unsigned usage)
+fd_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *pctx,
+ struct pipe_resource *prsc, struct winsys_handle *handle,
+ unsigned usage)
{
- struct fd_resource *rsc = fd_resource(prsc);
+ struct fd_resource *rsc = fd_resource(prsc);
- rsc->b.is_shared = true;
+ rsc->b.is_shared = true;
- handle->modifier = fd_resource_modifier(rsc);
+ handle->modifier = fd_resource_modifier(rsc);
- DBG("%"PRSC_FMT", modifier=%"PRIx64, PRSC_ARGS(prsc), handle->modifier);
+ DBG("%" PRSC_FMT ", modifier=%" PRIx64, PRSC_ARGS(prsc), handle->modifier);
- return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
- fd_resource_pitch(rsc, 0), handle);
+ return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
+ fd_resource_pitch(rsc, 0), handle);
}
/* special case to resize query buf after allocated.. */
void
fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
{
- struct fd_resource *rsc = fd_resource(prsc);
+ struct fd_resource *rsc = fd_resource(prsc);
- debug_assert(prsc->width0 == 0);
- debug_assert(prsc->target == PIPE_BUFFER);
- debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
+ debug_assert(prsc->width0 == 0);
+ debug_assert(prsc->target == PIPE_BUFFER);
+ debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
- prsc->width0 = sz;
- realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
+ prsc->width0 = sz;
+ realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
}
static void
fd_resource_layout_init(struct pipe_resource *prsc)
{
- struct fd_resource *rsc = fd_resource(prsc);
- struct fdl_layout *layout = &rsc->layout;
+ struct fd_resource *rsc = fd_resource(prsc);
+ struct fdl_layout *layout = &rsc->layout;
- layout->format = prsc->format;
+ layout->format = prsc->format;
- layout->width0 = prsc->width0;
- layout->height0 = prsc->height0;
- layout->depth0 = prsc->depth0;
+ layout->width0 = prsc->width0;
+ layout->height0 = prsc->height0;
+ layout->depth0 = prsc->depth0;
- layout->cpp = util_format_get_blocksize(prsc->format);
- layout->cpp *= fd_resource_nr_samples(prsc);
- layout->cpp_shift = ffs(layout->cpp) - 1;
+ layout->cpp = util_format_get_blocksize(prsc->format);
+ layout->cpp *= fd_resource_nr_samples(prsc);
+ layout->cpp_shift = ffs(layout->cpp) - 1;
}
static struct fd_resource *
-alloc_resource_struct(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
+alloc_resource_struct(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
{
- struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+ struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
- if (!rsc)
- return NULL;
+ if (!rsc)
+ return NULL;
- struct pipe_resource *prsc = &rsc->b.b;
- *prsc = *tmpl;
+ struct pipe_resource *prsc = &rsc->b.b;
+ *prsc = *tmpl;
- pipe_reference_init(&prsc->reference, 1);
- prsc->screen = pscreen;
+ pipe_reference_init(&prsc->reference, 1);
+ prsc->screen = pscreen;
- util_range_init(&rsc->valid_buffer_range);
- simple_mtx_init(&rsc->lock, mtx_plain);
+ util_range_init(&rsc->valid_buffer_range);
+ simple_mtx_init(&rsc->lock, mtx_plain);
- rsc->track = CALLOC_STRUCT(fd_resource_tracking);
- if (!rsc->track) {
- free(rsc);
- return NULL;
- }
+ rsc->track = CALLOC_STRUCT(fd_resource_tracking);
+ if (!rsc->track) {
+ free(rsc);
+ return NULL;
+ }
- pipe_reference_init(&rsc->track->reference, 1);
+ pipe_reference_init(&rsc->track->reference, 1);
- return rsc;
+ return rsc;
}
/**
*/
static struct pipe_resource *
fd_resource_allocate_and_resolve(struct pipe_screen *pscreen,
- const struct pipe_resource *tmpl,
- const uint64_t *modifiers, int count, uint32_t *psize)
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers, int count,
+ uint32_t *psize)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd_resource *rsc;
- struct pipe_resource *prsc;
- enum pipe_format format = tmpl->format;
- uint32_t size;
-
- rsc = alloc_resource_struct(pscreen, tmpl);
- if (!rsc)
- return NULL;
-
- prsc = &rsc->b.b;
-
- DBG("%"PRSC_FMT, PRSC_ARGS(prsc));
-
- threaded_resource_init(prsc);
-
- if (tmpl->bind & PIPE_BIND_SHARED)
- rsc->b.is_shared = true;
-
- fd_resource_layout_init(prsc);
-
-#define LINEAR \
- (PIPE_BIND_SCANOUT | \
- PIPE_BIND_LINEAR | \
- PIPE_BIND_DISPLAY_TARGET)
-
- bool linear = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
- if (linear) {
- perf_debug("%"PRSC_FMT": linear: DRM_FORMAT_MOD_LINEAR requested!", PRSC_ARGS(prsc));
- } else if (tmpl->bind & LINEAR) {
- if (tmpl->usage != PIPE_USAGE_STAGING)
- perf_debug("%"PRSC_FMT": linear: LINEAR bind requested!", PRSC_ARGS(prsc));
- linear = true;
- }
-
- if (FD_DBG(NOTILE))
- linear = true;
-
- /* Normally, for non-shared buffers, allow buffer compression if
- * not shared, otherwise only allow if QCOM_COMPRESSED modifier
- * is requested:
- *
- * TODO we should probably also limit tiled in a similar way,
- * except we don't have a format modifier for tiled. (We probably
- * should.)
- */
- bool allow_ubwc = false;
- if (!linear) {
- allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
- if (!allow_ubwc) {
- perf_debug("%"PRSC_FMT": not UBWC: DRM_FORMAT_MOD_INVALID not requested!",
- PRSC_ARGS(prsc));
- }
- if (tmpl->bind & PIPE_BIND_SHARED) {
- allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
- if (!allow_ubwc) {
- perf_debug("%"PRSC_FMT": not UBWC: shared and DRM_FORMAT_MOD_QCOM_COMPRESSED not requested!",
- PRSC_ARGS(prsc));
- linear = true;
- }
- }
- }
-
- allow_ubwc &= !FD_DBG(NOUBWC);
-
- if (screen->tile_mode &&
- (tmpl->target != PIPE_BUFFER) &&
- !linear) {
- rsc->layout.tile_mode = screen->tile_mode(prsc);
- }
-
- rsc->internal_format = format;
-
- rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc;
-
- if (prsc->target == PIPE_BUFFER) {
- assert(prsc->format == PIPE_FORMAT_R8_UNORM);
- size = prsc->width0;
- fdl_layout_buffer(&rsc->layout, size);
- } else {
- size = screen->setup_slices(rsc);
- }
-
- /* special case for hw-query buffer, which we need to allocate before we
- * know the size:
- */
- if (size == 0) {
- /* note, semi-intention == instead of & */
- debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
- *psize = 0;
- return prsc;
- }
-
- /* Set the layer size if the (non-a6xx) backend hasn't done so. */
- if (rsc->layout.layer_first && !rsc->layout.layer_size) {
- rsc->layout.layer_size = align(size, 4096);
- size = rsc->layout.layer_size * prsc->array_size;
- }
-
- if (FD_DBG(LAYOUT))
- fdl_dump_layout(&rsc->layout);
-
- /* Hand out the resolved size. */
- if (psize)
- *psize = size;
-
- return prsc;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_resource *rsc;
+ struct pipe_resource *prsc;
+ enum pipe_format format = tmpl->format;
+ uint32_t size;
+
+ rsc = alloc_resource_struct(pscreen, tmpl);
+ if (!rsc)
+ return NULL;
+
+ prsc = &rsc->b.b;
+
+ DBG("%" PRSC_FMT, PRSC_ARGS(prsc));
+
+ threaded_resource_init(prsc);
+
+ if (tmpl->bind & PIPE_BIND_SHARED)
+ rsc->b.is_shared = true;
+
+ fd_resource_layout_init(prsc);
+
+#define LINEAR (PIPE_BIND_SCANOUT | PIPE_BIND_LINEAR | PIPE_BIND_DISPLAY_TARGET)
+
+ bool linear = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+ if (linear) {
+ perf_debug("%" PRSC_FMT ": linear: DRM_FORMAT_MOD_LINEAR requested!",
+ PRSC_ARGS(prsc));
+ } else if (tmpl->bind & LINEAR) {
+ if (tmpl->usage != PIPE_USAGE_STAGING)
+ perf_debug("%" PRSC_FMT ": linear: LINEAR bind requested!",
+ PRSC_ARGS(prsc));
+ linear = true;
+ }
+
+ if (FD_DBG(NOTILE))
+ linear = true;
+
+ /* Normally, for non-shared buffers, allow buffer compression if
+ * not shared, otherwise only allow if QCOM_COMPRESSED modifier
+ * is requested:
+ *
+ * TODO we should probably also limit tiled in a similar way,
+ * except we don't have a format modifier for tiled. (We probably
+ * should.)
+ */
+ bool allow_ubwc = false;
+ if (!linear) {
+ allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
+ if (!allow_ubwc) {
+ perf_debug("%" PRSC_FMT
+ ": not UBWC: DRM_FORMAT_MOD_INVALID not requested!",
+ PRSC_ARGS(prsc));
+ }
+ if (tmpl->bind & PIPE_BIND_SHARED) {
+ allow_ubwc =
+ drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
+ if (!allow_ubwc) {
+ perf_debug("%" PRSC_FMT
+ ": not UBWC: shared and DRM_FORMAT_MOD_QCOM_COMPRESSED "
+ "not requested!",
+ PRSC_ARGS(prsc));
+ linear = true;
+ }
+ }
+ }
+
+ allow_ubwc &= !FD_DBG(NOUBWC);
+
+ if (screen->tile_mode && (tmpl->target != PIPE_BUFFER) && !linear) {
+ rsc->layout.tile_mode = screen->tile_mode(prsc);
+ }
+
+ rsc->internal_format = format;
+
+ rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc;
+
+ if (prsc->target == PIPE_BUFFER) {
+ assert(prsc->format == PIPE_FORMAT_R8_UNORM);
+ size = prsc->width0;
+ fdl_layout_buffer(&rsc->layout, size);
+ } else {
+ size = screen->setup_slices(rsc);
+ }
+
+ /* special case for hw-query buffer, which we need to allocate before we
+ * know the size:
+ */
+ if (size == 0) {
+ /* note, semi-intention == instead of & */
+ debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
+ *psize = 0;
+ return prsc;
+ }
+
+ /* Set the layer size if the (non-a6xx) backend hasn't done so. */
+ if (rsc->layout.layer_first && !rsc->layout.layer_size) {
+ rsc->layout.layer_size = align(size, 4096);
+ size = rsc->layout.layer_size * prsc->array_size;
+ }
+
+ if (FD_DBG(LAYOUT))
+ fdl_dump_layout(&rsc->layout);
+
+ /* Hand out the resolved size. */
+ if (psize)
+ *psize = size;
+
+ return prsc;
}
/**
*/
static struct pipe_resource *
fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
- const struct pipe_resource *tmpl,
- const uint64_t *modifiers, int count)
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers, int count)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd_resource *rsc;
- struct pipe_resource *prsc;
- uint32_t size;
-
- /* when using kmsro, scanout buffers are allocated on the display device
- * create_with_modifiers() doesn't give us usage flags, so we have to
- * assume that all calls with modifiers are scanout-possible
- */
- if (screen->ro &&
- ((tmpl->bind & PIPE_BIND_SCANOUT) ||
- !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
- struct pipe_resource scanout_templat = *tmpl;
- struct renderonly_scanout *scanout;
- struct winsys_handle handle;
-
- /* note: alignment is wrong for a6xx */
- scanout_templat.width0 = align(tmpl->width0, screen->info.gmem_align_w);
-
- scanout = renderonly_scanout_for_resource(&scanout_templat,
- screen->ro, &handle);
- if (!scanout)
- return NULL;
-
- renderonly_scanout_destroy(scanout, screen->ro);
-
- assert(handle.type == WINSYS_HANDLE_TYPE_FD);
- rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
- &handle,
- PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
- close(handle.handle);
- if (!rsc)
- return NULL;
-
- return &rsc->b.b;
- }
-
- prsc = fd_resource_allocate_and_resolve(pscreen, tmpl, modifiers, count, &size);
- if (!prsc)
- return NULL;
- rsc = fd_resource(prsc);
-
- realloc_bo(rsc, size);
- if (!rsc->bo)
- goto fail;
-
- return prsc;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_resource *rsc;
+ struct pipe_resource *prsc;
+ uint32_t size;
+
+ /* when using kmsro, scanout buffers are allocated on the display device
+ * create_with_modifiers() doesn't give us usage flags, so we have to
+ * assume that all calls with modifiers are scanout-possible
+ */
+ if (screen->ro &&
+ ((tmpl->bind & PIPE_BIND_SCANOUT) ||
+ !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
+ struct pipe_resource scanout_templat = *tmpl;
+ struct renderonly_scanout *scanout;
+ struct winsys_handle handle;
+
+ /* note: alignment is wrong for a6xx */
+ scanout_templat.width0 = align(tmpl->width0, screen->info.gmem_align_w);
+
+ scanout =
+ renderonly_scanout_for_resource(&scanout_templat, screen->ro, &handle);
+ if (!scanout)
+ return NULL;
+
+ renderonly_scanout_destroy(scanout, screen->ro);
+
+ assert(handle.type == WINSYS_HANDLE_TYPE_FD);
+ rsc = fd_resource(pscreen->resource_from_handle(
+ pscreen, tmpl, &handle, PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
+ close(handle.handle);
+ if (!rsc)
+ return NULL;
+
+ return &rsc->b.b;
+ }
+
+ prsc =
+ fd_resource_allocate_and_resolve(pscreen, tmpl, modifiers, count, &size);
+ if (!prsc)
+ return NULL;
+ rsc = fd_resource(prsc);
+
+ realloc_bo(rsc, size);
+ if (!rsc->bo)
+ goto fail;
+
+ return prsc;
fail:
- fd_resource_destroy(pscreen, prsc);
- return NULL;
+ fd_resource_destroy(pscreen, prsc);
+ return NULL;
}
static struct pipe_resource *
fd_resource_create(struct pipe_screen *pscreen,
- const struct pipe_resource *tmpl)
+ const struct pipe_resource *tmpl)
{
- const uint64_t mod = DRM_FORMAT_MOD_INVALID;
- return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+ const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+ return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
}
/**
*/
static struct pipe_resource *
fd_resource_from_handle(struct pipe_screen *pscreen,
- const struct pipe_resource *tmpl,
- struct winsys_handle *handle, unsigned usage)
+ const struct pipe_resource *tmpl,
+ struct winsys_handle *handle, unsigned usage)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd_resource *rsc = alloc_resource_struct(pscreen, tmpl);
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_resource *rsc = alloc_resource_struct(pscreen, tmpl);
- if (!rsc)
- return NULL;
+ if (!rsc)
+ return NULL;
- struct fdl_slice *slice = fd_resource_slice(rsc, 0);
- struct pipe_resource *prsc = &rsc->b.b;
+ struct fdl_slice *slice = fd_resource_slice(rsc, 0);
+ struct pipe_resource *prsc = &rsc->b.b;
- DBG("%"PRSC_FMT", modifier=%"PRIx64, PRSC_ARGS(prsc), handle->modifier);
+ DBG("%" PRSC_FMT ", modifier=%" PRIx64, PRSC_ARGS(prsc), handle->modifier);
- threaded_resource_init(prsc);
- rsc->b.is_shared = true;
+ threaded_resource_init(prsc);
+ rsc->b.is_shared = true;
- fd_resource_layout_init(prsc);
+ fd_resource_layout_init(prsc);
- struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, handle);
- if (!bo)
- goto fail;
+ struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, handle);
+ if (!bo)
+ goto fail;
- fd_resource_set_bo(rsc, bo);
+ fd_resource_set_bo(rsc, bo);
- rsc->internal_format = tmpl->format;
- rsc->layout.pitch0 = handle->stride;
- slice->offset = handle->offset;
- slice->size0 = handle->stride * prsc->height0;
+ rsc->internal_format = tmpl->format;
+ rsc->layout.pitch0 = handle->stride;
+ slice->offset = handle->offset;
+ slice->size0 = handle->stride * prsc->height0;
- /* use a pitchalign of gmem_align_w pixels, because GMEM resolve for
- * lower alignments is not implemented (but possible for a6xx at least)
- *
- * for UBWC-enabled resources, layout_resource_for_modifier will further
- * validate the pitch and set the right pitchalign
- */
- rsc->layout.pitchalign =
- fdl_cpp_shift(&rsc->layout) + util_logbase2(screen->info.gmem_align_w);
+ /* use a pitchalign of gmem_align_w pixels, because GMEM resolve for
+ * lower alignments is not implemented (but possible for a6xx at least)
+ *
+ * for UBWC-enabled resources, layout_resource_for_modifier will further
+ * validate the pitch and set the right pitchalign
+ */
+ rsc->layout.pitchalign =
+ fdl_cpp_shift(&rsc->layout) + util_logbase2(screen->info.gmem_align_w);
- /* apply the minimum pitchalign (note: actually 4 for a3xx but doesn't matter) */
- if (is_a6xx(screen) || is_a5xx(screen))
- rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 6);
- else
- rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 5);
+ /* apply the minimum pitchalign (note: actually 4 for a3xx but doesn't
+ * matter) */
+ if (is_a6xx(screen) || is_a5xx(screen))
+ rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 6);
+ else
+ rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 5);
- if (rsc->layout.pitch0 < (prsc->width0 * rsc->layout.cpp) ||
- fd_resource_pitch(rsc, 0) != rsc->layout.pitch0)
- goto fail;
+ if (rsc->layout.pitch0 < (prsc->width0 * rsc->layout.cpp) ||
+ fd_resource_pitch(rsc, 0) != rsc->layout.pitch0)
+ goto fail;
- assert(rsc->layout.cpp);
+ assert(rsc->layout.cpp);
- if (screen->layout_resource_for_modifier(rsc, handle->modifier) < 0)
- goto fail;
+ if (screen->layout_resource_for_modifier(rsc, handle->modifier) < 0)
+ goto fail;
- if (screen->ro) {
- rsc->scanout =
- renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
- /* failure is expected in some cases.. */
- }
+ if (screen->ro) {
+ rsc->scanout =
+ renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
+ /* failure is expected in some cases.. */
+ }
- rsc->valid = true;
+ rsc->valid = true;
- return prsc;
+ return prsc;
fail:
- fd_resource_destroy(pscreen, prsc);
- return NULL;
+ fd_resource_destroy(pscreen, prsc);
+ return NULL;
}
bool
fd_render_condition_check(struct pipe_context *pctx)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (!ctx->cond_query)
- return true;
+ if (!ctx->cond_query)
+ return true;
- union pipe_query_result res = { 0 };
- bool wait =
- ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
- ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+ union pipe_query_result res = {0};
+ bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+ ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
- if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
- return (bool)res.u64 != ctx->cond_cond;
+ if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
+ return (bool)res.u64 != ctx->cond_cond;
- return true;
+ return true;
}
static void
-fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
- in_dt
+fd_invalidate_resource(struct pipe_context *pctx,
+ struct pipe_resource *prsc) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_resource *rsc = fd_resource(prsc);
-
- if (prsc->target == PIPE_BUFFER) {
- /* Handle the glInvalidateBufferData() case:
- */
- invalidate_resource(rsc, PIPE_MAP_READ | PIPE_MAP_WRITE);
- } else if (rsc->track->write_batch) {
- /* Handle the glInvalidateFramebuffer() case, telling us that
- * we can skip resolve.
- */
-
- struct fd_batch *batch = rsc->track->write_batch;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-
- if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
- batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
- fd_context_dirty(ctx, FD_DIRTY_ZSA);
- }
-
- for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
- if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
- batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
- fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
- }
- }
- }
-
- rsc->valid = false;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(prsc);
+
+ if (prsc->target == PIPE_BUFFER) {
+ /* Handle the glInvalidateBufferData() case:
+ */
+ invalidate_resource(rsc, PIPE_MAP_READ | PIPE_MAP_WRITE);
+ } else if (rsc->track->write_batch) {
+ /* Handle the glInvalidateFramebuffer() case, telling us that
+ * we can skip resolve.
+ */
+
+ struct fd_batch *batch = rsc->track->write_batch;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+ if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
+ batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+ fd_context_dirty(ctx, FD_DIRTY_ZSA);
+ }
+
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
+ if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
+ batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
+ fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
+ }
+ }
+ }
+
+ rsc->valid = false;
}
static enum pipe_format
fd_resource_get_internal_format(struct pipe_resource *prsc)
{
- return fd_resource(prsc)->internal_format;
+ return fd_resource(prsc)->internal_format;
}
static void
fd_resource_set_stencil(struct pipe_resource *prsc,
- struct pipe_resource *stencil)
+ struct pipe_resource *stencil)
{
- fd_resource(prsc)->stencil = fd_resource(stencil);
+ fd_resource(prsc)->stencil = fd_resource(stencil);
}
static struct pipe_resource *
fd_resource_get_stencil(struct pipe_resource *prsc)
{
- struct fd_resource *rsc = fd_resource(prsc);
- if (rsc->stencil)
- return &rsc->stencil->b.b;
- return NULL;
+ struct fd_resource *rsc = fd_resource(prsc);
+ if (rsc->stencil)
+ return &rsc->stencil->b.b;
+ return NULL;
}
static const struct u_transfer_vtbl transfer_vtbl = {
- .resource_create = fd_resource_create,
- .resource_destroy = fd_resource_destroy,
- .transfer_map = fd_resource_transfer_map,
- .transfer_flush_region = fd_resource_transfer_flush_region,
- .transfer_unmap = fd_resource_transfer_unmap,
- .get_internal_format = fd_resource_get_internal_format,
- .set_stencil = fd_resource_set_stencil,
- .get_stencil = fd_resource_get_stencil,
+ .resource_create = fd_resource_create,
+ .resource_destroy = fd_resource_destroy,
+ .transfer_map = fd_resource_transfer_map,
+ .transfer_flush_region = fd_resource_transfer_flush_region,
+ .transfer_unmap = fd_resource_transfer_unmap,
+ .get_internal_format = fd_resource_get_internal_format,
+ .set_stencil = fd_resource_set_stencil,
+ .get_stencil = fd_resource_get_stencil,
};
static const uint64_t supported_modifiers[] = {
- DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_LINEAR,
};
static int
fd_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier)
{
- switch (modifier) {
- case DRM_FORMAT_MOD_LINEAR:
- /* The dri gallium frontend will pass DRM_FORMAT_MOD_INVALID to us
- * when it's called through any of the non-modifier BO create entry
- * points. Other drivers will determine tiling from the kernel or
- * other legacy backchannels, but for freedreno it just means
- * LINEAR. */
- case DRM_FORMAT_MOD_INVALID:
- return 0;
- default:
- return -1;
- }
+ switch (modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ /* The dri gallium frontend will pass DRM_FORMAT_MOD_INVALID to us
+ * when it's called through any of the non-modifier BO create entry
+ * points. Other drivers will determine tiling from the kernel or
+ * other legacy backchannels, but for freedreno it just means
+ * LINEAR. */
+ case DRM_FORMAT_MOD_INVALID:
+ return 0;
+ default:
+ return -1;
+ }
}
static struct pipe_resource *
fd_resource_from_memobj(struct pipe_screen *pscreen,
- const struct pipe_resource *tmpl,
- struct pipe_memory_object *pmemobj,
- uint64_t offset)
+ const struct pipe_resource *tmpl,
+ struct pipe_memory_object *pmemobj, uint64_t offset)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd_memory_object *memobj = fd_memory_object(pmemobj);
- struct pipe_resource *prsc;
- struct fd_resource *rsc;
- uint32_t size;
- assert(memobj->bo);
-
- /* We shouldn't get a scanout buffer here. */
- assert(!(tmpl->bind & PIPE_BIND_SCANOUT));
-
- uint64_t modifiers = DRM_FORMAT_MOD_INVALID;
- if (tmpl->bind & PIPE_BIND_LINEAR) {
- modifiers = DRM_FORMAT_MOD_LINEAR;
- } else if (is_a6xx(screen) && tmpl->width0 >= FDL_MIN_UBWC_WIDTH) {
- modifiers = DRM_FORMAT_MOD_QCOM_COMPRESSED;
- }
-
- /* Allocate new pipe resource. */
- prsc = fd_resource_allocate_and_resolve(pscreen, tmpl, &modifiers, 1, &size);
- if (!prsc)
- return NULL;
- rsc = fd_resource(prsc);
- rsc->b.is_shared = true;
-
- /* bo's size has to be large enough, otherwise cleanup resource and fail
- * gracefully.
- */
- if (fd_bo_size(memobj->bo) < size) {
- fd_resource_destroy(pscreen, prsc);
- return NULL;
- }
-
- /* Share the bo with the memory object. */
- fd_resource_set_bo(rsc, fd_bo_ref(memobj->bo));
-
- return prsc;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_memory_object *memobj = fd_memory_object(pmemobj);
+ struct pipe_resource *prsc;
+ struct fd_resource *rsc;
+ uint32_t size;
+ assert(memobj->bo);
+
+ /* We shouldn't get a scanout buffer here. */
+ assert(!(tmpl->bind & PIPE_BIND_SCANOUT));
+
+ uint64_t modifiers = DRM_FORMAT_MOD_INVALID;
+ if (tmpl->bind & PIPE_BIND_LINEAR) {
+ modifiers = DRM_FORMAT_MOD_LINEAR;
+ } else if (is_a6xx(screen) && tmpl->width0 >= FDL_MIN_UBWC_WIDTH) {
+ modifiers = DRM_FORMAT_MOD_QCOM_COMPRESSED;
+ }
+
+ /* Allocate new pipe resource. */
+ prsc = fd_resource_allocate_and_resolve(pscreen, tmpl, &modifiers, 1, &size);
+ if (!prsc)
+ return NULL;
+ rsc = fd_resource(prsc);
+ rsc->b.is_shared = true;
+
+ /* bo's size has to be large enough, otherwise cleanup resource and fail
+ * gracefully.
+ */
+ if (fd_bo_size(memobj->bo) < size) {
+ fd_resource_destroy(pscreen, prsc);
+ return NULL;
+ }
+
+ /* Share the bo with the memory object. */
+ fd_resource_set_bo(rsc, fd_bo_ref(memobj->bo));
+
+ return prsc;
}
static struct pipe_memory_object *
fd_memobj_create_from_handle(struct pipe_screen *pscreen,
- struct winsys_handle *whandle,
- bool dedicated)
+ struct winsys_handle *whandle, bool dedicated)
{
- struct fd_memory_object *memobj = CALLOC_STRUCT(fd_memory_object);
- if (!memobj)
- return NULL;
+ struct fd_memory_object *memobj = CALLOC_STRUCT(fd_memory_object);
+ if (!memobj)
+ return NULL;
- struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, whandle);
- if (!bo) {
- free(memobj);
- return NULL;
- }
+ struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, whandle);
+ if (!bo) {
+ free(memobj);
+ return NULL;
+ }
- memobj->b.dedicated = dedicated;
- memobj->bo = bo;
+ memobj->b.dedicated = dedicated;
+ memobj->bo = bo;
- return &memobj->b;
+ return &memobj->b;
}
static void
fd_memobj_destroy(struct pipe_screen *pscreen,
- struct pipe_memory_object *pmemobj)
+ struct pipe_memory_object *pmemobj)
{
- struct fd_memory_object *memobj = fd_memory_object(pmemobj);
+ struct fd_memory_object *memobj = fd_memory_object(pmemobj);
- assert(memobj->bo);
- fd_bo_del(memobj->bo);
+ assert(memobj->bo);
+ fd_bo_del(memobj->bo);
- free(pmemobj);
+ free(pmemobj);
}
void
fd_resource_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
- bool fake_rgtc = screen->gpu_id < 400;
-
- pscreen->resource_create = u_transfer_helper_resource_create;
- /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
- * variant:
- */
- pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
- pscreen->resource_from_handle = fd_resource_from_handle;
- pscreen->resource_get_handle = fd_resource_get_handle;
- pscreen->resource_destroy = u_transfer_helper_resource_destroy;
-
- pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
- true, false, fake_rgtc, true);
-
- if (!screen->layout_resource_for_modifier)
- screen->layout_resource_for_modifier = fd_layout_resource_for_modifier;
- if (!screen->supported_modifiers) {
- screen->supported_modifiers = supported_modifiers;
- screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
- }
-
- /* GL_EXT_memory_object */
- pscreen->memobj_create_from_handle = fd_memobj_create_from_handle;
- pscreen->memobj_destroy = fd_memobj_destroy;
- pscreen->resource_from_memobj = fd_resource_from_memobj;
+ struct fd_screen *screen = fd_screen(pscreen);
+ bool fake_rgtc = screen->gpu_id < 400;
+
+ pscreen->resource_create = u_transfer_helper_resource_create;
+ /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
+ * variant:
+ */
+ pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
+ pscreen->resource_from_handle = fd_resource_from_handle;
+ pscreen->resource_get_handle = fd_resource_get_handle;
+ pscreen->resource_destroy = u_transfer_helper_resource_destroy;
+
+ pscreen->transfer_helper =
+ u_transfer_helper_create(&transfer_vtbl, true, false, fake_rgtc, true);
+
+ if (!screen->layout_resource_for_modifier)
+ screen->layout_resource_for_modifier = fd_layout_resource_for_modifier;
+ if (!screen->supported_modifiers) {
+ screen->supported_modifiers = supported_modifiers;
+ screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
+ }
+
+ /* GL_EXT_memory_object */
+ pscreen->memobj_create_from_handle = fd_memobj_create_from_handle;
+ pscreen->memobj_destroy = fd_memobj_destroy;
+ pscreen->resource_from_memobj = fd_resource_from_memobj;
}
static void
-fd_get_sample_position(struct pipe_context *context,
- unsigned sample_count, unsigned sample_index,
- float *pos_out)
+fd_get_sample_position(struct pipe_context *context, unsigned sample_count,
+ unsigned sample_index, float *pos_out)
{
- /* The following is copied from nouveau/nv50 except for position
- * values, which are taken from blob driver */
- static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
- static const uint8_t pos2[2][2] = {
- { 0xc, 0xc }, { 0x4, 0x4 } };
- static const uint8_t pos4[4][2] = {
- { 0x6, 0x2 }, { 0xe, 0x6 },
- { 0x2, 0xa }, { 0xa, 0xe } };
- /* TODO needs to be verified on supported hw */
- static const uint8_t pos8[8][2] = {
- { 0x9, 0x5 }, { 0x7, 0xb },
- { 0xd, 0x9 }, { 0x5, 0x3 },
- { 0x3, 0xd }, { 0x1, 0x7 },
- { 0xb, 0xf }, { 0xf, 0x1 } };
-
- const uint8_t (*ptr)[2];
-
- switch (sample_count) {
- case 1:
- ptr = pos1;
- break;
- case 2:
- ptr = pos2;
- break;
- case 4:
- ptr = pos4;
- break;
- case 8:
- ptr = pos8;
- break;
- default:
- assert(0);
- return;
- }
-
- pos_out[0] = ptr[sample_index][0] / 16.0f;
- pos_out[1] = ptr[sample_index][1] / 16.0f;
+ /* The following is copied from nouveau/nv50 except for position
+ * values, which are taken from blob driver */
+ static const uint8_t pos1[1][2] = {{0x8, 0x8}};
+ static const uint8_t pos2[2][2] = {{0xc, 0xc}, {0x4, 0x4}};
+ static const uint8_t pos4[4][2] = {{0x6, 0x2},
+ {0xe, 0x6},
+ {0x2, 0xa},
+ {0xa, 0xe}};
+ /* TODO needs to be verified on supported hw */
+ static const uint8_t pos8[8][2] = {{0x9, 0x5}, {0x7, 0xb}, {0xd, 0x9},
+ {0x5, 0x3}, {0x3, 0xd}, {0x1, 0x7},
+ {0xb, 0xf}, {0xf, 0x1}};
+
+ const uint8_t(*ptr)[2];
+
+ switch (sample_count) {
+ case 1:
+ ptr = pos1;
+ break;
+ case 2:
+ ptr = pos2;
+ break;
+ case 4:
+ ptr = pos4;
+ break;
+ case 8:
+ ptr = pos8;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ pos_out[0] = ptr[sample_index][0] / 16.0f;
+ pos_out[1] = ptr[sample_index][1] / 16.0f;
}
static void
-fd_blit_pipe(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
- in_dt
+fd_blit_pipe(struct pipe_context *pctx,
+ const struct pipe_blit_info *blit_info) in_dt
{
- /* wrap fd_blit to return void */
- fd_blit(pctx, blit_info);
+ /* wrap fd_blit to return void */
+ fd_blit(pctx, blit_info);
}
void
fd_resource_context_init(struct pipe_context *pctx)
{
- pctx->transfer_map = u_transfer_helper_transfer_map;
- pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
- pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
- pctx->buffer_subdata = u_default_buffer_subdata;
- pctx->texture_subdata = u_default_texture_subdata;
- pctx->create_surface = fd_create_surface;
- pctx->surface_destroy = fd_surface_destroy;
- pctx->resource_copy_region = fd_resource_copy_region;
- pctx->blit = fd_blit_pipe;
- pctx->flush_resource = fd_flush_resource;
- pctx->invalidate_resource = fd_invalidate_resource;
- pctx->get_sample_position = fd_get_sample_position;
+ pctx->transfer_map = u_transfer_helper_transfer_map;
+ pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
+ pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
+ pctx->buffer_subdata = u_default_buffer_subdata;
+ pctx->texture_subdata = u_default_texture_subdata;
+ pctx->create_surface = fd_create_surface;
+ pctx->surface_destroy = fd_surface_destroy;
+ pctx->resource_copy_region = fd_resource_copy_region;
+ pctx->blit = fd_blit_pipe;
+ pctx->flush_resource = fd_flush_resource;
+ pctx->invalidate_resource = fd_invalidate_resource;
+ pctx->get_sample_position = fd_get_sample_position;
}
#define FREEDRENO_RESOURCE_H_
#include "util/list.h"
+#include "util/simple_mtx.h"
#include "util/u_dump.h"
#include "util/u_range.h"
#include "util/u_transfer_helper.h"
-#include "util/simple_mtx.h"
+#include "freedreno/fdl/freedreno_layout.h"
#include "freedreno_batch.h"
#include "freedreno_util.h"
-#include "freedreno/fdl/freedreno_layout.h"
-
-
-#define PRSC_FMT \
- "p: target=%s, format=%s, %ux%ux%u, " \
- "array_size=%u, last_level=%u, " \
- "nr_samples=%u, usage=%u, bind=%x, flags=%x"
-#define PRSC_ARGS(p) \
- (p), util_str_tex_target((p)->target, true), util_format_short_name((p)->format), \
- (p)->width0, (p)->height0, (p)->depth0, (p)->array_size, (p)->last_level, \
- (p)->nr_samples, (p)->usage, (p)->bind, (p)->flags
+#define PRSC_FMT \
+ "p: target=%s, format=%s, %ux%ux%u, " \
+ "array_size=%u, last_level=%u, " \
+ "nr_samples=%u, usage=%u, bind=%x, flags=%x"
+#define PRSC_ARGS(p) \
+ (p), util_str_tex_target((p)->target, true), \
+ util_format_short_name((p)->format), (p)->width0, (p)->height0, \
+ (p)->depth0, (p)->array_size, (p)->last_level, (p)->nr_samples, \
+ (p)->usage, (p)->bind, (p)->flags
enum fd_lrz_direction {
- FD_LRZ_UNKNOWN,
- /* Depth func less/less-than: */
- FD_LRZ_LESS,
- /* Depth func greater/greater-than: */
- FD_LRZ_GREATER,
+ FD_LRZ_UNKNOWN,
+ /* Depth func less/less-than: */
+ FD_LRZ_LESS,
+ /* Depth func greater/greater-than: */
+ FD_LRZ_GREATER,
};
/**
* per-tile query results.
*/
struct fd_resource_tracking {
- struct pipe_reference reference;
-
- /* bitmask of in-flight batches which reference this resource. Note
- * that the batch doesn't hold reference to resources (but instead
- * the fd_ringbuffer holds refs to the underlying fd_bo), but in case
- * the resource is destroyed we need to clean up the batch's weak
- * references to us.
- */
- uint32_t batch_mask;
-
- /* reference to batch that writes this resource: */
- struct fd_batch *write_batch;
-
- /* Set of batches whose batch-cache key references this resource.
- * We need to track this to know which batch-cache entries to
- * invalidate if, for example, the resource is invalidated or
- * shadowed.
- */
- uint32_t bc_batch_mask;
+ struct pipe_reference reference;
+
+ /* bitmask of in-flight batches which reference this resource. Note
+ * that the batch doesn't hold reference to resources (but instead
+ * the fd_ringbuffer holds refs to the underlying fd_bo), but in case
+ * the resource is destroyed we need to clean up the batch's weak
+ * references to us.
+ */
+ uint32_t batch_mask;
+
+ /* reference to batch that writes this resource: */
+ struct fd_batch *write_batch;
+
+ /* Set of batches whose batch-cache key references this resource.
+ * We need to track this to know which batch-cache entries to
+ * invalidate if, for example, the resource is invalidated or
+ * shadowed.
+ */
+ uint32_t bc_batch_mask;
};
void __fd_resource_tracking_destroy(struct fd_resource_tracking *track);
static inline void
fd_resource_tracking_reference(struct fd_resource_tracking **ptr,
- struct fd_resource_tracking *track)
+ struct fd_resource_tracking *track)
{
- struct fd_resource_tracking *old_track = *ptr;
+ struct fd_resource_tracking *old_track = *ptr;
- if (pipe_reference(&(*ptr)->reference, &track->reference)) {
- assert(!old_track->write_batch);
- free(old_track);
- }
+ if (pipe_reference(&(*ptr)->reference, &track->reference)) {
+ assert(!old_track->write_batch);
+ free(old_track);
+ }
- *ptr = track;
+ *ptr = track;
}
/**
* A resource (any buffer/texture/image/etc)
*/
struct fd_resource {
- struct threaded_resource b;
- struct fd_bo *bo; /* use fd_resource_set_bo() to write */
- enum pipe_format internal_format;
- struct fdl_layout layout;
-
- /* buffer range that has been initialized */
- struct util_range valid_buffer_range;
- bool valid;
- struct renderonly_scanout *scanout;
-
- /* reference to the resource holding stencil data for a z32_s8 texture */
- /* TODO rename to secondary or auxiliary? */
- struct fd_resource *stencil;
-
- struct fd_resource_tracking *track;
-
- simple_mtx_t lock;
-
- /* bitmask of state this resource could potentially dirty when rebound,
- * see rebind_resource()
- */
- enum fd_dirty_3d_state dirty;
-
- /* Sequence # incremented each time bo changes: */
- uint16_t seqno;
-
- /* Is this buffer a replacement created by threaded_context to avoid
- * a stall in PIPE_MAP_DISCARD_WHOLE_RESOURCE|PIPE_MAP_WRITE case?
- * If so, it no longer "owns" it's rsc->track, and so should not
- * invalidate when the rsc is destroyed.
- */
- bool is_replacement : 1;
-
- /* Uninitialized resources with UBWC format need their UBWC flag data
- * cleared before writes, as the UBWC state is read and used during
- * writes, so undefined UBWC flag data results in undefined results.
- */
- bool needs_ubwc_clear : 1;
-
- /*
- * LRZ
- *
- * TODO lrz width/height/pitch should probably also move to
- * fdl_layout
- */
- bool lrz_valid : 1;
- enum fd_lrz_direction lrz_direction : 2;
- uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
- uint16_t lrz_height;
- uint16_t lrz_pitch;
- struct fd_bo *lrz;
+ struct threaded_resource b;
+ struct fd_bo *bo; /* use fd_resource_set_bo() to write */
+ enum pipe_format internal_format;
+ struct fdl_layout layout;
+
+ /* buffer range that has been initialized */
+ struct util_range valid_buffer_range;
+ bool valid;
+ struct renderonly_scanout *scanout;
+
+ /* reference to the resource holding stencil data for a z32_s8 texture */
+ /* TODO rename to secondary or auxiliary? */
+ struct fd_resource *stencil;
+
+ struct fd_resource_tracking *track;
+
+ simple_mtx_t lock;
+
+ /* bitmask of state this resource could potentially dirty when rebound,
+ * see rebind_resource()
+ */
+ enum fd_dirty_3d_state dirty;
+
+ /* Sequence # incremented each time bo changes: */
+ uint16_t seqno;
+
+ /* Is this buffer a replacement created by threaded_context to avoid
+ * a stall in PIPE_MAP_DISCARD_WHOLE_RESOURCE|PIPE_MAP_WRITE case?
+ * If so, it no longer "owns" it's rsc->track, and so should not
+ * invalidate when the rsc is destroyed.
+ */
+ bool is_replacement : 1;
+
+ /* Uninitialized resources with UBWC format need their UBWC flag data
+ * cleared before writes, as the UBWC state is read and used during
+ * writes, so undefined UBWC flag data results in undefined results.
+ */
+ bool needs_ubwc_clear : 1;
+
+ /*
+ * LRZ
+ *
+ * TODO lrz width/height/pitch should probably also move to
+ * fdl_layout
+ */
+ bool lrz_valid : 1;
+ enum fd_lrz_direction lrz_direction : 2;
+ uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
+ uint16_t lrz_height;
+ uint16_t lrz_pitch;
+ struct fd_bo *lrz;
};
struct fd_memory_object {
- struct pipe_memory_object b;
- struct fd_bo *bo;
+ struct pipe_memory_object b;
+ struct fd_bo *bo;
};
static inline struct fd_resource *
fd_resource(struct pipe_resource *ptex)
{
- return (struct fd_resource *)ptex;
+ return (struct fd_resource *)ptex;
}
static inline const struct fd_resource *
fd_resource_const(const struct pipe_resource *ptex)
{
- return (const struct fd_resource *)ptex;
+ return (const struct fd_resource *)ptex;
}
static inline struct fd_memory_object *
-fd_memory_object (struct pipe_memory_object *pmemobj)
+fd_memory_object(struct pipe_memory_object *pmemobj)
{
- return (struct fd_memory_object *)pmemobj;
+ return (struct fd_memory_object *)pmemobj;
}
static inline bool
pending(struct fd_resource *rsc, bool write)
{
- /* if we have a pending GPU write, we are busy in any case: */
- if (rsc->track->write_batch)
- return true;
+ /* if we have a pending GPU write, we are busy in any case: */
+ if (rsc->track->write_batch)
+ return true;
- /* if CPU wants to write, but we are pending a GPU read, we are busy: */
- if (write && rsc->track->batch_mask)
- return true;
+ /* if CPU wants to write, but we are pending a GPU read, we are busy: */
+ if (write && rsc->track->batch_mask)
+ return true;
- if (rsc->stencil && pending(rsc->stencil, write))
- return true;
+ if (rsc->stencil && pending(rsc->stencil, write))
+ return true;
- return false;
+ return false;
}
static inline bool
fd_resource_busy(struct fd_resource *rsc, unsigned op)
{
- return fd_bo_cpu_prep(rsc->bo, NULL, op | DRM_FREEDRENO_PREP_NOSYNC) != 0;
+ return fd_bo_cpu_prep(rsc->bo, NULL, op | DRM_FREEDRENO_PREP_NOSYNC) != 0;
}
-int __fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc, unsigned op, const char *func);
-#define fd_resource_wait(ctx, rsc, op) __fd_resource_wait(ctx, rsc, op, __func__)
+int __fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc,
+ unsigned op, const char *func);
+#define fd_resource_wait(ctx, rsc, op) \
+ __fd_resource_wait(ctx, rsc, op, __func__)
static inline void
fd_resource_lock(struct fd_resource *rsc)
{
- simple_mtx_lock(&rsc->lock);
+ simple_mtx_lock(&rsc->lock);
}
static inline void
fd_resource_unlock(struct fd_resource *rsc)
{
- simple_mtx_unlock(&rsc->lock);
+ simple_mtx_unlock(&rsc->lock);
}
static inline void
fd_resource_set_usage(struct pipe_resource *prsc, enum fd_dirty_3d_state usage)
{
- if (!prsc)
- return;
- struct fd_resource *rsc = fd_resource(prsc);
- /* Bits are only ever ORed in, and we expect many set_usage() per
- * resource, so do the quick check outside of the lock.
- */
- if (likely(rsc->dirty & usage))
- return;
- fd_resource_lock(rsc);
- rsc->dirty |= usage;
- fd_resource_unlock(rsc);
+ if (!prsc)
+ return;
+ struct fd_resource *rsc = fd_resource(prsc);
+ /* Bits are only ever ORed in, and we expect many set_usage() per
+ * resource, so do the quick check outside of the lock.
+ */
+ if (likely(rsc->dirty & usage))
+ return;
+ fd_resource_lock(rsc);
+ rsc->dirty |= usage;
+ fd_resource_unlock(rsc);
}
static inline bool
has_depth(enum pipe_format format)
{
- const struct util_format_description *desc =
- util_format_description(format);
- return util_format_has_depth(desc);
+ const struct util_format_description *desc = util_format_description(format);
+ return util_format_has_depth(desc);
}
struct fd_transfer {
- struct threaded_transfer b;
- struct pipe_resource *staging_prsc;
- struct pipe_box staging_box;
+ struct threaded_transfer b;
+ struct pipe_resource *staging_prsc;
+ struct pipe_box staging_box;
};
static inline struct fd_transfer *
fd_transfer(struct pipe_transfer *ptrans)
{
- return (struct fd_transfer *)ptrans;
+ return (struct fd_transfer *)ptrans;
}
static inline struct fdl_slice *
fd_resource_slice(struct fd_resource *rsc, unsigned level)
{
- assert(level <= rsc->b.b.last_level);
- return &rsc->layout.slices[level];
+ assert(level <= rsc->b.b.last_level);
+ return &rsc->layout.slices[level];
}
static inline uint32_t
fd_resource_layer_stride(struct fd_resource *rsc, unsigned level)
{
- return fdl_layer_stride(&rsc->layout, level);
+ return fdl_layer_stride(&rsc->layout, level);
}
/* get pitch (in bytes) for specified mipmap level */
static inline uint32_t
fd_resource_pitch(struct fd_resource *rsc, unsigned level)
{
- if (is_a2xx(fd_screen(rsc->b.b.screen)))
- return fdl2_pitch(&rsc->layout, level);
+ if (is_a2xx(fd_screen(rsc->b.b.screen)))
+ return fdl2_pitch(&rsc->layout, level);
- return fdl_pitch(&rsc->layout, level);
+ return fdl_pitch(&rsc->layout, level);
}
/* get offset for specified mipmap level and texture/array layer */
static inline uint32_t
fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
{
- uint32_t offset = fdl_surface_offset(&rsc->layout, level, layer);
- debug_assert(offset < fd_bo_size(rsc->bo));
- return offset;
+ uint32_t offset = fdl_surface_offset(&rsc->layout, level, layer);
+ debug_assert(offset < fd_bo_size(rsc->bo));
+ return offset;
}
static inline uint32_t
fd_resource_ubwc_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
{
- uint32_t offset = fdl_ubwc_offset(&rsc->layout, level, layer);
- debug_assert(offset < fd_bo_size(rsc->bo));
- return offset;
+ uint32_t offset = fdl_ubwc_offset(&rsc->layout, level, layer);
+ debug_assert(offset < fd_bo_size(rsc->bo));
+ return offset;
}
/* This might be a5xx specific, but higher mipmap levels are always linear: */
static inline bool
fd_resource_level_linear(const struct pipe_resource *prsc, int level)
{
- struct fd_screen *screen = fd_screen(prsc->screen);
- debug_assert(!is_a3xx(screen));
+ struct fd_screen *screen = fd_screen(prsc->screen);
+ debug_assert(!is_a3xx(screen));
- return fdl_level_linear(&fd_resource_const(prsc)->layout, level);
+ return fdl_level_linear(&fd_resource_const(prsc)->layout, level);
}
static inline uint32_t
fd_resource_tile_mode(struct pipe_resource *prsc, int level)
{
- return fdl_tile_mode(&fd_resource(prsc)->layout, level);
+ return fdl_tile_mode(&fd_resource(prsc)->layout, level);
}
static inline bool
fd_resource_ubwc_enabled(struct fd_resource *rsc, int level)
{
- return fdl_ubwc_enabled(&rsc->layout, level);
+ return fdl_ubwc_enabled(&rsc->layout, level);
}
/* access # of samples, with 0 normalized to 1 (which is what we care about
static inline unsigned
fd_resource_nr_samples(struct pipe_resource *prsc)
{
- return MAX2(1, prsc->nr_samples);
+ return MAX2(1, prsc->nr_samples);
}
void fd_resource_screen_init(struct pipe_screen *pscreen);
uint32_t fd_setup_slices(struct fd_resource *rsc);
void fd_resource_resize(struct pipe_resource *prsc, uint32_t sz);
-void fd_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst,
- struct pipe_resource *src) in_dt;
-void fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc) assert_dt;
+void fd_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src) in_dt;
+void fd_resource_uncompress(struct fd_context *ctx,
+ struct fd_resource *rsc) assert_dt;
void fd_resource_dump(struct fd_resource *rsc, const char *name);
bool fd_render_condition_check(struct pipe_context *pctx) assert_dt;
static inline bool
fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc)
{
- return rsc->track->batch_mask & (1 << batch->idx);
+ return rsc->track->batch_mask & (1 << batch->idx);
}
static inline void
-fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc)
- assert_dt
+fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
{
- if (unlikely(rsc->needs_ubwc_clear)) {
- batch->ctx->clear_ubwc(batch, rsc);
- rsc->needs_ubwc_clear = false;
- }
+ if (unlikely(rsc->needs_ubwc_clear)) {
+ batch->ctx->clear_ubwc(batch, rsc);
+ rsc->needs_ubwc_clear = false;
+ }
}
static inline void
fd_batch_resource_read(struct fd_batch *batch,
- struct fd_resource *rsc)
- assert_dt
+ struct fd_resource *rsc) assert_dt
{
- /* Fast path: if we hit this then we know we don't have anyone else
- * writing to it (since both _write and _read flush other writers), and
- * that we've already recursed for stencil.
- */
- if (unlikely(!fd_batch_references_resource(batch, rsc)))
- fd_batch_resource_read_slowpath(batch, rsc);
+ /* Fast path: if we hit this then we know we don't have anyone else
+ * writing to it (since both _write and _read flush other writers), and
+ * that we've already recursed for stencil.
+ */
+ if (unlikely(!fd_batch_references_resource(batch, rsc)))
+ fd_batch_resource_read_slowpath(batch, rsc);
}
#endif /* FREEDRENO_RESOURCE_H_ */
* Rob Clark <robclark@freedesktop.org>
*/
-
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/format/u_format_s3tc.h"
+#include "util/u_debug.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
#include "util/u_screen.h"
#include "util/u_string.h"
-#include "util/u_debug.h"
#include "util/os_time.h"
-#include "drm-uapi/drm_fourcc.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include "drm-uapi/drm_fourcc.h"
#include <sys/sysinfo.h>
-#include "freedreno_screen.h"
-#include "freedreno_resource.h"
#include "freedreno_fence.h"
#include "freedreno_query.h"
+#include "freedreno_resource.h"
+#include "freedreno_screen.h"
#include "freedreno_util.h"
#include "a2xx/fd2_screen.h"
/* for fd_get_driver/device_uuid() */
#include "common/freedreno_uuid.h"
-#include "ir3/ir3_nir.h"
-#include "ir3/ir3_gallium.h"
#include "a2xx/ir2.h"
+#include "ir3/ir3_gallium.h"
+#include "ir3/ir3_nir.h"
/* clang-format off */
static const struct debug_named_value fd_debug_options[] = {
static const char *
fd_screen_get_name(struct pipe_screen *pscreen)
{
- static char buffer[128];
- snprintf(buffer, sizeof(buffer), "FD%03d",
- fd_screen(pscreen)->device_id);
- return buffer;
+ static char buffer[128];
+ snprintf(buffer, sizeof(buffer), "FD%03d", fd_screen(pscreen)->device_id);
+ return buffer;
}
static const char *
fd_screen_get_vendor(struct pipe_screen *pscreen)
{
- return "freedreno";
+ return "freedreno";
}
static const char *
fd_screen_get_device_vendor(struct pipe_screen *pscreen)
{
- return "Qualcomm";
+ return "Qualcomm";
}
-
static uint64_t
fd_screen_get_timestamp(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
-
- if (screen->has_timestamp) {
- uint64_t n;
- fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n);
- debug_assert(screen->max_freq > 0);
- return n * 1000000000 / screen->max_freq;
- } else {
- int64_t cpu_time = os_time_get() * 1000;
- return cpu_time + screen->cpu_gpu_time_delta;
- }
-
+ struct fd_screen *screen = fd_screen(pscreen);
+
+ if (screen->has_timestamp) {
+ uint64_t n;
+ fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n);
+ debug_assert(screen->max_freq > 0);
+ return n * 1000000000 / screen->max_freq;
+ } else {
+ int64_t cpu_time = os_time_get() * 1000;
+ return cpu_time + screen->cpu_gpu_time_delta;
+ }
}
static void
fd_screen_destroy(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- if (screen->pipe)
- fd_pipe_del(screen->pipe);
+ if (screen->pipe)
+ fd_pipe_del(screen->pipe);
- if (screen->dev)
- fd_device_del(screen->dev);
+ if (screen->dev)
+ fd_device_del(screen->dev);
- if (screen->ro)
- screen->ro->destroy(screen->ro);
+ if (screen->ro)
+ screen->ro->destroy(screen->ro);
- fd_bc_fini(&screen->batch_cache);
- fd_gmem_screen_fini(pscreen);
+ fd_bc_fini(&screen->batch_cache);
+ fd_gmem_screen_fini(pscreen);
- slab_destroy_parent(&screen->transfer_pool);
+ slab_destroy_parent(&screen->transfer_pool);
- simple_mtx_destroy(&screen->lock);
+ simple_mtx_destroy(&screen->lock);
- u_transfer_helper_destroy(pscreen->transfer_helper);
+ u_transfer_helper_destroy(pscreen->transfer_helper);
- if (screen->compiler)
- ir3_screen_fini(pscreen);
+ if (screen->compiler)
+ ir3_screen_fini(pscreen);
- ralloc_free(screen->live_batches);
+ ralloc_free(screen->live_batches);
- free(screen->perfcntr_queries);
- free(screen);
+ free(screen->perfcntr_queries);
+ free(screen);
}
/*
static int
fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
- struct fd_screen *screen = fd_screen(pscreen);
-
- /* this is probably not totally correct.. but it's a start: */
- switch (param) {
- /* Supported features (boolean caps). */
- case PIPE_CAP_NPOT_TEXTURES:
- case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
- case PIPE_CAP_ANISOTROPIC_FILTER:
- case PIPE_CAP_POINT_SPRITE:
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
- case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_STRING_MARKER:
- case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
- case PIPE_CAP_TEXTURE_BARRIER:
- case PIPE_CAP_INVALIDATE_BUFFER:
- case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
- case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
- case PIPE_CAP_NIR_COMPACT_ARRAYS:
- return 1;
-
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- return !is_a2xx(screen);
-
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- return is_a2xx(screen);
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- return !is_a2xx(screen);
-
- case PIPE_CAP_PACKED_UNIFORMS:
- return !is_a2xx(screen);
-
- case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- return screen->has_robustness;
-
- case PIPE_CAP_VERTEXID_NOBASE:
- return is_a3xx(screen) || is_a4xx(screen);
-
- case PIPE_CAP_COMPUTE:
- return has_compute(screen);
-
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_PCI_GROUP:
- case PIPE_CAP_PCI_BUS:
- case PIPE_CAP_PCI_DEVICE:
- case PIPE_CAP_PCI_FUNCTION:
- return 0;
-
- case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
- case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
- case PIPE_CAP_VERTEX_SHADER_SATURATE:
- case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
- case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
- case PIPE_CAP_CONDITIONAL_RENDER:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- case PIPE_CAP_CLIP_HALFZ:
- return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
-
- case PIPE_CAP_FAKE_SW_MSAA:
- return !fd_screen_get_param(pscreen, PIPE_CAP_TEXTURE_MULTISAMPLE);
-
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
- return is_a5xx(screen) || is_a6xx(screen);
-
- case PIPE_CAP_SURFACE_SAMPLE_COUNT:
- return is_a6xx(screen);
-
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen);
-
- case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
- return is_a6xx(screen);
-
- case PIPE_CAP_POLYGON_OFFSET_CLAMP:
- return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
-
- case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
- return 0;
-
- case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- if (is_a3xx(screen)) return 16;
- if (is_a4xx(screen)) return 32;
- if (is_a5xx(screen) || is_a6xx(screen)) return 64;
- return 0;
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* We could possibly emulate more by pretending 2d/rect textures and
- * splitting high bits of index into 2nd dimension..
- */
- if (is_a3xx(screen)) return 8192;
- if (is_a4xx(screen)) return 16384;
-
- /* Note that the Vulkan blob on a540 and 640 report a
- * maxTexelBufferElements of just 65536 (the GLES3.2 and Vulkan
- * minimum).
- */
- if (is_a5xx(screen) || is_a6xx(screen)) return 1 << 27;
- return 0;
-
- case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
- case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
- return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
-
- case PIPE_CAP_START_INSTANCE:
- /* Note that a5xx can do this, it just can't (at least with
- * current firmware) do draw_indirect with base_instance.
- * Since draw_indirect is needed sooner (gles31 and gl40 vs
- * gl42), hide base_instance on a5xx. :-/
- */
- return is_a4xx(screen);
-
- case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return is_a2xx(screen) ? 64 : 32;
-
- case PIPE_CAP_GLSL_FEATURE_LEVEL:
- case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- if (is_a6xx(screen))
- return 330;
- else if (is_ir3(screen))
- return 140;
- else
- return 120;
-
- case PIPE_CAP_ESSL_FEATURE_LEVEL:
- /* we can probably enable 320 for a5xx too, but need to test: */
- if (is_a6xx(screen)) return 320;
- if (is_a5xx(screen)) return 310;
- if (is_ir3(screen)) return 300;
- return 120;
-
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
- if (is_a6xx(screen)) return 64;
- if (is_a5xx(screen)) return 4;
- return 0;
-
- case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
- return 4;
- return 0;
-
- /* TODO if we need this, do it in nir/ir3 backend to avoid breaking precompile: */
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
- return 0;
-
- case PIPE_CAP_FBFETCH:
- if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&
- is_a6xx(screen))
- return 1;
- return 0;
- case PIPE_CAP_SAMPLE_SHADING:
- if (is_a6xx(screen)) return 1;
- return 0;
-
- case PIPE_CAP_CONTEXT_PRIORITY_MASK:
- return screen->priority_mask;
-
- case PIPE_CAP_DRAW_INDIRECT:
- if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
- return 1;
- return 0;
-
- case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
- if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
- return 1;
- return 0;
-
- case PIPE_CAP_LOAD_CONSTBUF:
- /* name is confusing, but this turns on std430 packing */
- if (is_ir3(screen))
- return 1;
- return 0;
-
- case PIPE_CAP_NIR_IMAGES_AS_DEREF:
- return 0;
-
- case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
-
- case PIPE_CAP_MAX_VARYINGS:
- return is_a6xx(screen) ? 31 : 16;
-
- case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- /* We don't really have a limit on this, it all goes into the main
- * memory buffer. Needs to be at least 120 / 4 (minimum requirement
- * for GL_MAX_TESS_PATCH_COMPONENTS).
- */
- return 128;
-
- case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
- return 64 * 1024 * 1024;
-
- case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
- if (is_ir3(screen))
- return 1;
- return 0;
-
- /* Geometry shaders.. */
- case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
- return 512;
- case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
- return 2048;
- case PIPE_CAP_MAX_GS_INVOCATIONS:
- return 32;
-
- /* Only a2xx has the half-border clamp mode in HW, just have mesa/st lower
- * it for later HW.
- */
- case PIPE_CAP_GL_CLAMP:
- return is_a2xx(screen);
-
- case PIPE_CAP_CLIP_PLANES:
- /* On a3xx, there is HW support for GL user clip planes that
- * occasionally has to fall back to shader key-based lowering to clip
- * distances in the VS, and we don't support clip distances so that is
- * always shader-based lowering in the FS.
- *
- * On a4xx, there is no HW support for clip planes, so they are
- * always lowered to clip distances. We also lack SW support for the
- * HW's clip distances in HW, so we do shader-based lowering in the FS
- * in the driver backend.
- *
- * On a5xx-a6xx, we have the HW clip distances hooked up, so we just let
- * mesa/st lower desktop GL's clip planes to clip distances in the last
- * vertex shader stage.
- */
- return !is_a5xx(screen) && !is_a6xx(screen);
-
- /* Stream output. */
- case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- if (is_ir3(screen))
- return PIPE_MAX_SO_BUFFERS;
- return 0;
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
- case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_TGSI_TEXCOORD:
- if (is_ir3(screen))
- return 1;
- return 0;
- case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- return 1;
- case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
- return is_a2xx(screen);
- case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- if (is_ir3(screen))
- return 16 * 4; /* should only be shader out limit? */
- return 0;
-
- /* Texturing. */
- case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
- if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))
- return 16384;
- else
- return 8192;
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))
- return 15;
- else
- return 14;
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 11;
-
- case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) ? 256 : 0;
-
- /* Render targets. */
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return screen->max_rts;
- case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- return (is_a3xx(screen) || is_a6xx(screen)) ? 1 : 0;
-
- /* Queries. */
- case PIPE_CAP_OCCLUSION_QUERY:
- return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
- case PIPE_CAP_QUERY_TIMESTAMP:
- case PIPE_CAP_QUERY_TIME_ELAPSED:
- /* only a4xx, requires new enough kernel so we know max_freq: */
- return (screen->max_freq > 0) && (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen));
-
- case PIPE_CAP_VENDOR_ID:
- return 0x5143;
- case PIPE_CAP_DEVICE_ID:
- return 0xFFFFFFFF;
- case PIPE_CAP_ACCELERATED:
- return 1;
- case PIPE_CAP_VIDEO_MEMORY:
- DBG("FINISHME: The value returned is incorrect\n");
- return 10;
- case PIPE_CAP_UMA:
- return 1;
- case PIPE_CAP_MEMOBJ:
- return fd_device_version(screen->dev) >= FD_VERSION_MEMORY_FD;
- case PIPE_CAP_NATIVE_FENCE_FD:
- return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD;
- case PIPE_CAP_FENCE_SIGNAL:
- return screen->has_syncobj;
- case PIPE_CAP_CULL_DISTANCE:
- return is_a6xx(screen);
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
- return is_a6xx(screen);
- case PIPE_CAP_TWO_SIDED_COLOR:
- return 0;
- default:
- return u_pipe_screen_get_param_defaults(pscreen, param);
- }
+ struct fd_screen *screen = fd_screen(pscreen);
+
+ /* this is probably not totally correct.. but it's a start: */
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
+ case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
+ case PIPE_CAP_NIR_COMPACT_ARRAYS:
+ return 1;
+
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ return !is_a2xx(screen);
+
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return is_a2xx(screen);
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return !is_a2xx(screen);
+
+ case PIPE_CAP_PACKED_UNIFORMS:
+ return !is_a2xx(screen);
+
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return screen->has_robustness;
+
+ case PIPE_CAP_VERTEXID_NOBASE:
+ return is_a3xx(screen) || is_a4xx(screen);
+
+ case PIPE_CAP_COMPUTE:
+ return has_compute(screen);
+
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ return 0;
+
+ case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
+ case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
+ case PIPE_CAP_VERTEX_SHADER_SATURATE:
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_CLIP_HALFZ:
+ return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
+ is_a6xx(screen);
+
+ case PIPE_CAP_FAKE_SW_MSAA:
+ return !fd_screen_get_param(pscreen, PIPE_CAP_TEXTURE_MULTISAMPLE);
+
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return is_a5xx(screen) || is_a6xx(screen);
+
+ case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+ return is_a6xx(screen);
+
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen);
+
+ case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
+ return is_a6xx(screen);
+
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
+
+ case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
+ return 0;
+
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ if (is_a3xx(screen))
+ return 16;
+ if (is_a4xx(screen))
+ return 32;
+ if (is_a5xx(screen) || is_a6xx(screen))
+ return 64;
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ /* We could possibly emulate more by pretending 2d/rect textures and
+ * splitting high bits of index into 2nd dimension..
+ */
+ if (is_a3xx(screen))
+ return 8192;
+ if (is_a4xx(screen))
+ return 16384;
+
+ /* Note that the Vulkan blob on a540 and 640 report a
+ * maxTexelBufferElements of just 65536 (the GLES3.2 and Vulkan
+ * minimum).
+ */
+ if (is_a5xx(screen) || is_a6xx(screen))
+ return 1 << 27;
+ return 0;
+
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
+
+ case PIPE_CAP_START_INSTANCE:
+ /* Note that a5xx can do this, it just can't (at least with
+ * current firmware) do draw_indirect with base_instance.
+ * Since draw_indirect is needed sooner (gles31 and gl40 vs
+ * gl42), hide base_instance on a5xx. :-/
+ */
+ return is_a4xx(screen);
+
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return is_a2xx(screen) ? 64 : 32;
+
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+ if (is_a6xx(screen))
+ return 330;
+ else if (is_ir3(screen))
+ return 140;
+ else
+ return 120;
+
+ case PIPE_CAP_ESSL_FEATURE_LEVEL:
+ /* we can probably enable 320 for a5xx too, but need to test: */
+ if (is_a6xx(screen))
+ return 320;
+ if (is_a5xx(screen))
+ return 310;
+ if (is_ir3(screen))
+ return 300;
+ return 120;
+
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ if (is_a6xx(screen))
+ return 64;
+ if (is_a5xx(screen))
+ return 4;
+ return 0;
+
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
+ return 4;
+ return 0;
+
+ /* TODO if we need this, do it in nir/ir3 backend to avoid breaking
+ * precompile: */
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ return 0;
+
+ case PIPE_CAP_FBFETCH:
+ if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&
+ is_a6xx(screen))
+ return 1;
+ return 0;
+ case PIPE_CAP_SAMPLE_SHADING:
+ if (is_a6xx(screen))
+ return 1;
+ return 0;
+
+ case PIPE_CAP_CONTEXT_PRIORITY_MASK:
+ return screen->priority_mask;
+
+ case PIPE_CAP_DRAW_INDIRECT:
+ if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
+ return 1;
+ return 0;
+
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
+ return 1;
+ return 0;
+
+ case PIPE_CAP_LOAD_CONSTBUF:
+ /* name is confusing, but this turns on std430 packing */
+ if (is_ir3(screen))
+ return 1;
+ return 0;
+
+ case PIPE_CAP_NIR_IMAGES_AS_DEREF:
+ return 0;
+
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+
+ case PIPE_CAP_MAX_VARYINGS:
+ return is_a6xx(screen) ? 31 : 16;
+
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ /* We don't really have a limit on this, it all goes into the main
+ * memory buffer. Needs to be at least 120 / 4 (minimum requirement
+ * for GL_MAX_TESS_PATCH_COMPONENTS).
+ */
+ return 128;
+
+ case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
+ return 64 * 1024 * 1024;
+
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+ if (is_ir3(screen))
+ return 1;
+ return 0;
+
+ /* Geometry shaders.. */
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ return 512;
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 2048;
+ case PIPE_CAP_MAX_GS_INVOCATIONS:
+ return 32;
+
+ /* Only a2xx has the half-border clamp mode in HW, just have mesa/st lower
+ * it for later HW.
+ */
+ case PIPE_CAP_GL_CLAMP:
+ return is_a2xx(screen);
+
+ case PIPE_CAP_CLIP_PLANES:
+ /* On a3xx, there is HW support for GL user clip planes that
+ * occasionally has to fall back to shader key-based lowering to clip
+ * distances in the VS, and we don't support clip distances so that is
+ * always shader-based lowering in the FS.
+ *
+ * On a4xx, there is no HW support for clip planes, so they are
+ * always lowered to clip distances. We also lack SW support for the
+ * HW's clip distances in HW, so we do shader-based lowering in the FS
+ * in the driver backend.
+ *
+ * On a5xx-a6xx, we have the HW clip distances hooked up, so we just let
+ * mesa/st lower desktop GL's clip planes to clip distances in the last
+ * vertex shader stage.
+ */
+ return !is_a5xx(screen) && !is_a6xx(screen);
+
+ /* Stream output. */
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ if (is_ir3(screen))
+ return PIPE_MAX_SO_BUFFERS;
+ return 0;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ if (is_ir3(screen))
+ return 1;
+ return 0;
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ return 1;
+ case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
+ return is_a2xx(screen);
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ if (is_ir3(screen))
+ return 16 * 4; /* should only be shader out limit? */
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
+ if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))
+ return 16384;
+ else
+ return 8192;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))
+ return 15;
+ else
+ return 14;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 11;
+
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
+ is_a6xx(screen))
+ ? 256
+ : 0;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return screen->max_rts;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return (is_a3xx(screen) || is_a6xx(screen)) ? 1 : 0;
+
+ /* Queries. */
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
+ is_a6xx(screen);
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ /* only a4xx, requires new enough kernel so we know max_freq: */
+ return (screen->max_freq > 0) &&
+ (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen));
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0x5143;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_VIDEO_MEMORY:
+ DBG("FINISHME: The value returned is incorrect\n");
+ return 10;
+ case PIPE_CAP_UMA:
+ return 1;
+ case PIPE_CAP_MEMOBJ:
+ return fd_device_version(screen->dev) >= FD_VERSION_MEMORY_FD;
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD;
+ case PIPE_CAP_FENCE_SIGNAL:
+ return screen->has_syncobj;
+ case PIPE_CAP_CULL_DISTANCE:
+ return is_a6xx(screen);
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return is_a6xx(screen);
+ case PIPE_CAP_TWO_SIDED_COLOR:
+ return 0;
+ default:
+ return u_pipe_screen_get_param_defaults(pscreen, param);
+ }
}
static float
fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
{
- switch (param) {
- case PIPE_CAPF_MAX_LINE_WIDTH:
- case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- /* NOTE: actual value is 127.0f, but this is working around a deqp
- * bug.. dEQP-GLES3.functional.rasterization.primitives.lines_wide
- * uses too small of a render target size, and gets confused when
- * the lines start going offscreen.
- *
- * See: https://code.google.com/p/android/issues/detail?id=206513
- */
- if (FD_DBG(DEQP))
- return 48.0f;
- return 127.0f;
- case PIPE_CAPF_MAX_POINT_WIDTH:
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
- return 4092.0f;
- case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- return 16.0f;
- case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
- return 15.0f;
- case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
- case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
- case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
- return 0.0f;
- }
- mesa_loge("unknown paramf %d", param);
- return 0;
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ /* NOTE: actual value is 127.0f, but this is working around a deqp
+ * bug.. dEQP-GLES3.functional.rasterization.primitives.lines_wide
+ * uses too small of a render target size, and gets confused when
+ * the lines start going offscreen.
+ *
+ * See: https://code.google.com/p/android/issues/detail?id=206513
+ */
+ if (FD_DBG(DEQP))
+ return 48.0f;
+ return 127.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 4092.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 15.0f;
+ case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+ return 0.0f;
+ }
+ mesa_loge("unknown paramf %d", param);
+ return 0;
}
static int
fd_screen_get_shader_param(struct pipe_screen *pscreen,
- enum pipe_shader_type shader,
- enum pipe_shader_cap param)
+ enum pipe_shader_type shader,
+ enum pipe_shader_cap param)
{
- struct fd_screen *screen = fd_screen(pscreen);
-
- switch(shader)
- {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_VERTEX:
- break;
- case PIPE_SHADER_TESS_CTRL:
- case PIPE_SHADER_TESS_EVAL:
- case PIPE_SHADER_GEOMETRY:
- if (is_a6xx(screen))
- break;
- return 0;
- case PIPE_SHADER_COMPUTE:
- if (has_compute(screen))
- break;
- return 0;
- default:
- mesa_loge("unknown shader type %d", shader);
- return 0;
- }
-
- /* this is probably not totally correct.. but it's a start: */
- switch (param) {
- case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
- return 16384;
- case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
- return 8; /* XXX */
- case PIPE_SHADER_CAP_MAX_INPUTS:
- if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen))
- return 16;
- return is_a6xx(screen) ? 32 : 16;
- case PIPE_SHADER_CAP_MAX_OUTPUTS:
- return is_a6xx(screen) ? 32 : 16;
- case PIPE_SHADER_CAP_MAX_TEMPS:
- return 64; /* Max native temporaries. */
- case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
- /* NOTE: seems to be limit for a3xx is actually 512 but
- * split between VS and FS. Use lower limit of 256 to
- * avoid getting into impossible situations:
- */
- return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) ? 4096 : 64) * sizeof(float[4]);
- case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return is_ir3(screen) ? 16 : 1;
- case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
- return 1;
- case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
- /* Technically this should be the same as for TEMP/CONST, since
- * everything is just normal registers. This is just temporary
- * hack until load_input/store_output handle arrays in a similar
- * way as load_var/store_var..
- *
- * For tessellation stages, inputs are loaded using ldlw or ldg, both
- * of which support indirection.
- */
- return shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL;
- case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
- /* a2xx compiler doesn't handle indirect: */
- return is_ir3(screen) ? 1 : 0;
- case PIPE_SHADER_CAP_SUBROUTINES:
- case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
- case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
- case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
- case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
- return 0;
- case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
- return 1;
- case PIPE_SHADER_CAP_INTEGERS:
- return is_ir3(screen) ? 1 : 0;
- case PIPE_SHADER_CAP_INT64_ATOMICS:
- case PIPE_SHADER_CAP_FP16_DERIVATIVES:
- case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
- case PIPE_SHADER_CAP_INT16:
- case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- return 0;
- case PIPE_SHADER_CAP_FP16:
- return ((is_a5xx(screen) || is_a6xx(screen)) &&
- (shader == PIPE_SHADER_COMPUTE ||
- shader == PIPE_SHADER_FRAGMENT) &&
- !FD_DBG(NOFP16));
- case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
- case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return 16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_NIR;
- case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
- case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- return 32;
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
- case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
- if (is_a5xx(screen) || is_a6xx(screen)) {
- /* a5xx (and a4xx for that matter) has one state-block
- * for compute-shader SSBO's and another that is shared
- * by VS/HS/DS/GS/FS.. so to simplify things for now
- * just advertise SSBOs for FS and CS. We could possibly
- * do what blob does, and partition the space for
- * VS/HS/DS/GS/FS. The blob advertises:
- *
- * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
- * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
- * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
- * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
- * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
- * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
- * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
- *
- * I think that way we could avoid having to patch shaders
- * for actual SSBO indexes by using a static partitioning.
- *
- * Note same state block is used for images and buffers,
- * but images also need texture state for read access
- * (isam/isam.3d)
- */
- switch(shader)
- {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_COMPUTE:
- return 24;
- default:
- return 0;
- }
- }
- return 0;
- }
- mesa_loge("unknown shader param %d", param);
- return 0;
+ struct fd_screen *screen = fd_screen(pscreen);
+
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ case PIPE_SHADER_GEOMETRY:
+ if (is_a6xx(screen))
+ break;
+ return 0;
+ case PIPE_SHADER_COMPUTE:
+ if (has_compute(screen))
+ break;
+ return 0;
+ default:
+ mesa_loge("unknown shader type %d", shader);
+ return 0;
+ }
+
+ /* this is probably not totally correct.. but it's a start: */
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 8; /* XXX */
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen))
+ return 16;
+ return is_a6xx(screen) ? 32 : 16;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return is_a6xx(screen) ? 32 : 16;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 64; /* Max native temporaries. */
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ /* NOTE: seems to be limit for a3xx is actually 512 but
+ * split between VS and FS. Use lower limit of 256 to
+ * avoid getting into impossible situations:
+ */
+ return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
+ is_a6xx(screen))
+ ? 4096
+ : 64) *
+ sizeof(float[4]);
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return is_ir3(screen) ? 16 : 1;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ /* Technically this should be the same as for TEMP/CONST, since
+ * everything is just normal registers. This is just temporary
+ * hack until load_input/store_output handle arrays in a similar
+ * way as load_var/store_var..
+ *
+ * For tessellation stages, inputs are loaded using ldlw or ldg, both
+ * of which support indirection.
+ */
+ return shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ /* a2xx compiler doesn't handle indirect: */
+ return is_ir3(screen) ? 1 : 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+ return 0;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return is_ir3(screen) ? 1 : 0;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+ case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+ case PIPE_SHADER_CAP_INT16:
+ case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
+ return 0;
+ case PIPE_SHADER_CAP_FP16:
+ return (
+ (is_a5xx(screen) || is_a6xx(screen)) &&
+ (shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT) &&
+ !FD_DBG(NOFP16));
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ return 16;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_NIR;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ if (is_a5xx(screen) || is_a6xx(screen)) {
+ /* a5xx (and a4xx for that matter) has one state-block
+ * for compute-shader SSBO's and another that is shared
+ * by VS/HS/DS/GS/FS.. so to simplify things for now
+ * just advertise SSBOs for FS and CS. We could possibly
+ * do what blob does, and partition the space for
+ * VS/HS/DS/GS/FS. The blob advertises:
+ *
+ * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
+ * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
+ *
+ * I think that way we could avoid having to patch shaders
+ * for actual SSBO indexes by using a static partitioning.
+ *
+ * Note same state block is used for images and buffers,
+ * but images also need texture state for read access
+ * (isam/isam.3d)
+ */
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
+ return 24;
+ default:
+ return 0;
+ }
+ }
+ return 0;
+ }
+ mesa_loge("unknown shader param %d", param);
+ return 0;
}
/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this
*/
static int
fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
- enum pipe_compute_cap param, void *ret)
+ enum pipe_compute_cap param, void *ret)
{
- struct fd_screen *screen = fd_screen(pscreen);
- const char * const ir = "ir3";
+ struct fd_screen *screen = fd_screen(pscreen);
+ const char *const ir = "ir3";
- if (!has_compute(screen))
- return 0;
+ if (!has_compute(screen))
+ return 0;
-#define RET(x) do { \
- if (ret) \
- memcpy(ret, x, sizeof(x)); \
- return sizeof(x); \
-} while (0)
+#define RET(x) \
+ do { \
+ if (ret) \
+ memcpy(ret, x, sizeof(x)); \
+ return sizeof(x); \
+ } while (0)
- switch (param) {
- case PIPE_COMPUTE_CAP_ADDRESS_BITS:
-// don't expose 64b pointer support yet, until ir3 supports 64b
-// math, otherwise spir64 target is used and we get 64b pointer
-// calculations that we can't do yet
-// if (is_a5xx(screen))
-// RET((uint32_t []){ 64 });
- RET((uint32_t []){ 32 });
+ switch (param) {
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ // don't expose 64b pointer support yet, until ir3 supports 64b
+ // math, otherwise spir64 target is used and we get 64b pointer
+ // calculations that we can't do yet
+ // if (is_a5xx(screen))
+ // RET((uint32_t []){ 64 });
+ RET((uint32_t[]){32});
- case PIPE_COMPUTE_CAP_IR_TARGET:
- if (ret)
- sprintf(ret, "%s", ir);
- return strlen(ir) * sizeof(char);
+ case PIPE_COMPUTE_CAP_IR_TARGET:
+ if (ret)
+ sprintf(ret, "%s", ir);
+ return strlen(ir) * sizeof(char);
- case PIPE_COMPUTE_CAP_GRID_DIMENSION:
- RET((uint64_t []) { 3 });
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t[]){3});
- case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
- RET(((uint64_t []) { 65535, 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t[]){65535, 65535, 65535}));
- case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
- RET(((uint64_t []) { 1024, 1024, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t[]){1024, 1024, 64}));
- case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
- RET((uint64_t []) { 1024 });
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t[]){1024});
- case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
- RET((uint64_t []) { screen->ram_size });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ RET((uint64_t[]){screen->ram_size});
- case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
- RET((uint64_t []) { 32768 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ RET((uint64_t[]){32768});
- case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
- case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
- RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ RET((uint64_t[]){4096});
- case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
- RET((uint64_t []) { screen->ram_size });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t[]){screen->ram_size});
- case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
- RET((uint32_t []) { screen->max_freq / 1000000 });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t[]){screen->max_freq / 1000000});
- case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
- RET((uint32_t []) { 9999 }); // TODO
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t[]){9999}); // TODO
- case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
- RET((uint32_t []) { 1 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t[]){1});
- case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
- RET((uint32_t []) { 32 }); // TODO
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t[]){32}); // TODO
- case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
- RET((uint64_t []) { 1024 }); // TODO
- }
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ RET((uint64_t[]){1024}); // TODO
+ }
- return 0;
+ return 0;
}
static const void *
-fd_get_compiler_options(struct pipe_screen *pscreen,
- enum pipe_shader_ir ir, unsigned shader)
+fd_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
+ unsigned shader)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- if (is_ir3(screen))
- return ir3_get_compiler_options(screen->compiler);
+ if (is_ir3(screen))
+ return ir3_get_compiler_options(screen->compiler);
- return ir2_get_compiler_options();
+ return ir2_get_compiler_options();
}
static struct disk_cache *
fd_get_disk_shader_cache(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- if (is_ir3(screen)) {
- struct ir3_compiler *compiler = screen->compiler;
- return compiler->disk_cache;
- }
+ if (is_ir3(screen)) {
+ struct ir3_compiler *compiler = screen->compiler;
+ return compiler->disk_cache;
+ }
- return NULL;
+ return NULL;
}
bool
-fd_screen_bo_get_handle(struct pipe_screen *pscreen,
- struct fd_bo *bo,
- struct renderonly_scanout *scanout,
- unsigned stride,
- struct winsys_handle *whandle)
+fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo,
+ struct renderonly_scanout *scanout, unsigned stride,
+ struct winsys_handle *whandle)
{
- whandle->stride = stride;
-
- if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
- return fd_bo_get_name(bo, &whandle->handle) == 0;
- } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
- if (renderonly_get_handle(scanout, whandle))
- return true;
- whandle->handle = fd_bo_handle(bo);
- return true;
- } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
- whandle->handle = fd_bo_dmabuf(bo);
- return true;
- } else {
- return false;
- }
+ whandle->stride = stride;
+
+ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+ return fd_bo_get_name(bo, &whandle->handle) == 0;
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+ if (renderonly_get_handle(scanout, whandle))
+ return true;
+ whandle->handle = fd_bo_handle(bo);
+ return true;
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+ whandle->handle = fd_bo_dmabuf(bo);
+ return true;
+ } else {
+ return false;
+ }
}
static void
fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
- enum pipe_format format,
- int max, uint64_t *modifiers,
- unsigned int *external_only,
- int *count)
+ enum pipe_format format, int max,
+ uint64_t *modifiers,
+ unsigned int *external_only, int *count)
{
- struct fd_screen *screen = fd_screen(pscreen);
- int i, num = 0;
+ struct fd_screen *screen = fd_screen(pscreen);
+ int i, num = 0;
- max = MIN2(max, screen->num_supported_modifiers);
+ max = MIN2(max, screen->num_supported_modifiers);
- if (!max) {
- max = screen->num_supported_modifiers;
- external_only = NULL;
- modifiers = NULL;
- }
+ if (!max) {
+ max = screen->num_supported_modifiers;
+ external_only = NULL;
+ modifiers = NULL;
+ }
- for (i = 0; i < max; i++) {
- if (modifiers)
- modifiers[num] = screen->supported_modifiers[i];
+ for (i = 0; i < max; i++) {
+ if (modifiers)
+ modifiers[num] = screen->supported_modifiers[i];
- if (external_only)
- external_only[num] = 0;
+ if (external_only)
+ external_only[num] = 0;
- num++;
- }
+ num++;
+ }
- *count = num;
+ *count = num;
}
static bool
fd_screen_is_dmabuf_modifier_supported(struct pipe_screen *pscreen,
- uint64_t modifier,
- enum pipe_format format,
- bool *external_only)
+ uint64_t modifier,
+ enum pipe_format format,
+ bool *external_only)
{
- struct fd_screen *screen = fd_screen(pscreen);
- int i;
+ struct fd_screen *screen = fd_screen(pscreen);
+ int i;
- for (i = 0; i < screen->num_supported_modifiers; i++) {
- if (modifier == screen->supported_modifiers[i]) {
- if (external_only)
- *external_only = false;
+ for (i = 0; i < screen->num_supported_modifiers; i++) {
+ if (modifier == screen->supported_modifiers[i]) {
+ if (external_only)
+ *external_only = false;
- return true;
- }
- }
+ return true;
+ }
+ }
- return false;
+ return false;
}
struct fd_bo *
fd_screen_bo_from_handle(struct pipe_screen *pscreen,
- struct winsys_handle *whandle)
+ struct winsys_handle *whandle)
{
- struct fd_screen *screen = fd_screen(pscreen);
- struct fd_bo *bo;
-
- if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
- bo = fd_bo_from_name(screen->dev, whandle->handle);
- } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
- bo = fd_bo_from_handle(screen->dev, whandle->handle, 0);
- } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
- bo = fd_bo_from_dmabuf(screen->dev, whandle->handle);
- } else {
- DBG("Attempt to import unsupported handle type %d", whandle->type);
- return NULL;
- }
-
- if (!bo) {
- DBG("ref name 0x%08x failed", whandle->handle);
- return NULL;
- }
-
- return bo;
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_bo *bo;
+
+ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+ bo = fd_bo_from_name(screen->dev, whandle->handle);
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+ bo = fd_bo_from_handle(screen->dev, whandle->handle, 0);
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+ bo = fd_bo_from_dmabuf(screen->dev, whandle->handle);
+ } else {
+ DBG("Attempt to import unsupported handle type %d", whandle->type);
+ return NULL;
+ }
+
+ if (!bo) {
+ DBG("ref name 0x%08x failed", whandle->handle);
+ return NULL;
+ }
+
+ return bo;
}
-static void _fd_fence_ref(struct pipe_screen *pscreen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *pfence)
+static void
+_fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *pfence)
{
- fd_fence_ref(ptr, pfence);
+ fd_fence_ref(ptr, pfence);
}
static void
fd_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- fd_get_device_uuid(uuid, screen->gpu_id);
+ fd_get_device_uuid(uuid, screen->gpu_id);
}
static void
fd_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
{
- fd_get_driver_uuid(uuid);
+ fd_get_driver_uuid(uuid);
}
struct pipe_screen *
fd_screen_create(struct fd_device *dev, struct renderonly *ro)
{
- struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
- struct pipe_screen *pscreen;
- uint64_t val;
-
- fd_mesa_debug = debug_get_option_fd_mesa_debug();
-
- if (FD_DBG(NOBIN))
- fd_binning_enabled = false;
-
- if (!screen)
- return NULL;
-
- pscreen = &screen->base;
-
- screen->dev = dev;
- screen->ro = ro;
- screen->refcnt = 1;
-
- // maybe this should be in context?
- screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
- if (!screen->pipe) {
- DBG("could not create 3d pipe");
- goto fail;
- }
-
- if (fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val)) {
- DBG("could not get GMEM size");
- goto fail;
- }
- screen->gmemsize_bytes = env_var_as_unsigned("FD_MESA_GMEM", val);
-
- if (fd_device_version(dev) >= FD_VERSION_GMEM_BASE) {
- fd_pipe_get_param(screen->pipe, FD_GMEM_BASE, &screen->gmem_base);
- }
-
- if (fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val)) {
- DBG("could not get device-id");
- goto fail;
- }
- screen->device_id = val;
-
- if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
- DBG("could not get gpu freq");
- /* this limits what performance related queries are
- * supported but is not fatal
- */
- screen->max_freq = 0;
- } else {
- screen->max_freq = val;
- if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
- screen->has_timestamp = true;
- }
-
- if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
- DBG("could not get gpu-id");
- goto fail;
- }
- screen->gpu_id = val;
-
- if (fd_pipe_get_param(screen->pipe, FD_CHIP_ID, &val)) {
- DBG("could not get chip-id");
- /* older kernels may not have this property: */
- unsigned core = screen->gpu_id / 100;
- unsigned major = (screen->gpu_id % 100) / 10;
- unsigned minor = screen->gpu_id % 10;
- unsigned patch = 0; /* assume the worst */
- val = (patch & 0xff) | ((minor & 0xff) << 8) |
- ((major & 0xff) << 16) | ((core & 0xff) << 24);
- }
- screen->chip_id = val;
-
- if (fd_pipe_get_param(screen->pipe, FD_NR_RINGS, &val)) {
- DBG("could not get # of rings");
- screen->priority_mask = 0;
- } else {
- /* # of rings equates to number of unique priority values: */
- screen->priority_mask = (1 << val) - 1;
- }
-
- if (fd_device_version(dev) >= FD_VERSION_ROBUSTNESS)
- screen->has_robustness = true;
-
- screen->has_syncobj = fd_has_syncobj(screen->dev);
-
- struct sysinfo si;
- sysinfo(&si);
- screen->ram_size = si.totalram;
-
- DBG("Pipe Info:");
- DBG(" GPU-id: %d", screen->gpu_id);
- DBG(" Chip-id: 0x%08x", screen->chip_id);
- DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes);
-
- /* explicitly checking for GPU revisions that are known to work. This
- * may be overly conservative for a3xx, where spoofing the gpu_id with
- * the blob driver seems to generate identical cmdstream dumps. But
- * on a2xx, there seem to be small differences between the GPU revs
- * so it is probably better to actually test first on real hardware
- * before enabling:
- *
- * If you have a different adreno version, feel free to add it to one
- * of the cases below and see what happens. And if it works, please
- * send a patch ;-)
- */
- switch (screen->gpu_id) {
- case 200:
- case 201:
- case 205:
- case 220:
- fd2_screen_init(pscreen);
- break;
- case 305:
- case 307:
- case 320:
- case 330:
- fd3_screen_init(pscreen);
- break;
- case 405:
- case 420:
- case 430:
- fd4_screen_init(pscreen);
- break;
- case 510:
- case 530:
- case 540:
- fd5_screen_init(pscreen);
- break;
- case 618:
- case 630:
- case 640:
- case 650:
- fd6_screen_init(pscreen);
- break;
- default:
- mesa_loge("unsupported GPU: a%03d", screen->gpu_id);
- goto fail;
- }
-
- freedreno_dev_info_init(&screen->info, screen->gpu_id);
-
- if (FD_DBG(PERFC)) {
- screen->perfcntr_groups = fd_perfcntrs(screen->gpu_id,
- &screen->num_perfcntr_groups);
- }
-
- /* NOTE: don't enable if we have too old of a kernel to support
- * growable cmdstream buffers, since memory requirement for cmdstream
- * buffers would be too much otherwise.
- */
- if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)
- screen->reorder = !FD_DBG(INORDER);
-
- if (BATCH_DEBUG)
- screen->live_batches = _mesa_pointer_set_create(NULL);
-
- fd_bc_init(&screen->batch_cache);
-
- list_inithead(&screen->context_list);
-
- (void) simple_mtx_init(&screen->lock, mtx_plain);
-
- pscreen->destroy = fd_screen_destroy;
- pscreen->get_param = fd_screen_get_param;
- pscreen->get_paramf = fd_screen_get_paramf;
- pscreen->get_shader_param = fd_screen_get_shader_param;
- pscreen->get_compute_param = fd_get_compute_param;
- pscreen->get_compiler_options = fd_get_compiler_options;
- pscreen->get_disk_shader_cache = fd_get_disk_shader_cache;
-
- fd_resource_screen_init(pscreen);
- fd_query_screen_init(pscreen);
- fd_gmem_screen_init(pscreen);
-
- pscreen->get_name = fd_screen_get_name;
- pscreen->get_vendor = fd_screen_get_vendor;
- pscreen->get_device_vendor = fd_screen_get_device_vendor;
-
- pscreen->get_timestamp = fd_screen_get_timestamp;
-
- pscreen->fence_reference = _fd_fence_ref;
- pscreen->fence_finish = fd_fence_finish;
- pscreen->fence_get_fd = fd_fence_get_fd;
-
- pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers;
- pscreen->is_dmabuf_modifier_supported = fd_screen_is_dmabuf_modifier_supported;
-
- pscreen->get_device_uuid = fd_screen_get_device_uuid;
- pscreen->get_driver_uuid = fd_screen_get_driver_uuid;
-
- slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
-
- return pscreen;
+ struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
+ struct pipe_screen *pscreen;
+ uint64_t val;
+
+ fd_mesa_debug = debug_get_option_fd_mesa_debug();
+
+ if (FD_DBG(NOBIN))
+ fd_binning_enabled = false;
+
+ if (!screen)
+ return NULL;
+
+ pscreen = &screen->base;
+
+ screen->dev = dev;
+ screen->ro = ro;
+ screen->refcnt = 1;
+
+ // maybe this should be in context?
+ screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
+ if (!screen->pipe) {
+ DBG("could not create 3d pipe");
+ goto fail;
+ }
+
+ if (fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val)) {
+ DBG("could not get GMEM size");
+ goto fail;
+ }
+ screen->gmemsize_bytes = env_var_as_unsigned("FD_MESA_GMEM", val);
+
+ if (fd_device_version(dev) >= FD_VERSION_GMEM_BASE) {
+ fd_pipe_get_param(screen->pipe, FD_GMEM_BASE, &screen->gmem_base);
+ }
+
+ if (fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val)) {
+ DBG("could not get device-id");
+ goto fail;
+ }
+ screen->device_id = val;
+
+ if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
+ DBG("could not get gpu freq");
+ /* this limits what performance related queries are
+ * supported but is not fatal
+ */
+ screen->max_freq = 0;
+ } else {
+ screen->max_freq = val;
+ if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
+ screen->has_timestamp = true;
+ }
+
+ if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
+ DBG("could not get gpu-id");
+ goto fail;
+ }
+ screen->gpu_id = val;
+
+ if (fd_pipe_get_param(screen->pipe, FD_CHIP_ID, &val)) {
+ DBG("could not get chip-id");
+ /* older kernels may not have this property: */
+ unsigned core = screen->gpu_id / 100;
+ unsigned major = (screen->gpu_id % 100) / 10;
+ unsigned minor = screen->gpu_id % 10;
+ unsigned patch = 0; /* assume the worst */
+ val = (patch & 0xff) | ((minor & 0xff) << 8) | ((major & 0xff) << 16) |
+ ((core & 0xff) << 24);
+ }
+ screen->chip_id = val;
+
+ if (fd_pipe_get_param(screen->pipe, FD_NR_RINGS, &val)) {
+ DBG("could not get # of rings");
+ screen->priority_mask = 0;
+ } else {
+ /* # of rings equates to number of unique priority values: */
+ screen->priority_mask = (1 << val) - 1;
+ }
+
+ if (fd_device_version(dev) >= FD_VERSION_ROBUSTNESS)
+ screen->has_robustness = true;
+
+ screen->has_syncobj = fd_has_syncobj(screen->dev);
+
+ struct sysinfo si;
+ sysinfo(&si);
+ screen->ram_size = si.totalram;
+
+ DBG("Pipe Info:");
+ DBG(" GPU-id: %d", screen->gpu_id);
+ DBG(" Chip-id: 0x%08x", screen->chip_id);
+ DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes);
+
+ /* explicitly checking for GPU revisions that are known to work. This
+ * may be overly conservative for a3xx, where spoofing the gpu_id with
+ * the blob driver seems to generate identical cmdstream dumps. But
+ * on a2xx, there seem to be small differences between the GPU revs
+ * so it is probably better to actually test first on real hardware
+ * before enabling:
+ *
+ * If you have a different adreno version, feel free to add it to one
+ * of the cases below and see what happens. And if it works, please
+ * send a patch ;-)
+ */
+ switch (screen->gpu_id) {
+ case 200:
+ case 201:
+ case 205:
+ case 220:
+ fd2_screen_init(pscreen);
+ break;
+ case 305:
+ case 307:
+ case 320:
+ case 330:
+ fd3_screen_init(pscreen);
+ break;
+ case 405:
+ case 420:
+ case 430:
+ fd4_screen_init(pscreen);
+ break;
+ case 510:
+ case 530:
+ case 540:
+ fd5_screen_init(pscreen);
+ break;
+ case 618:
+ case 630:
+ case 640:
+ case 650:
+ fd6_screen_init(pscreen);
+ break;
+ default:
+ mesa_loge("unsupported GPU: a%03d", screen->gpu_id);
+ goto fail;
+ }
+
+ freedreno_dev_info_init(&screen->info, screen->gpu_id);
+
+ if (FD_DBG(PERFC)) {
+ screen->perfcntr_groups =
+ fd_perfcntrs(screen->gpu_id, &screen->num_perfcntr_groups);
+ }
+
+ /* NOTE: don't enable if we have too old of a kernel to support
+ * growable cmdstream buffers, since memory requirement for cmdstream
+ * buffers would be too much otherwise.
+ */
+ if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)
+ screen->reorder = !FD_DBG(INORDER);
+
+ if (BATCH_DEBUG)
+ screen->live_batches = _mesa_pointer_set_create(NULL);
+
+ fd_bc_init(&screen->batch_cache);
+
+ list_inithead(&screen->context_list);
+
+ (void)simple_mtx_init(&screen->lock, mtx_plain);
+
+ pscreen->destroy = fd_screen_destroy;
+ pscreen->get_param = fd_screen_get_param;
+ pscreen->get_paramf = fd_screen_get_paramf;
+ pscreen->get_shader_param = fd_screen_get_shader_param;
+ pscreen->get_compute_param = fd_get_compute_param;
+ pscreen->get_compiler_options = fd_get_compiler_options;
+ pscreen->get_disk_shader_cache = fd_get_disk_shader_cache;
+
+ fd_resource_screen_init(pscreen);
+ fd_query_screen_init(pscreen);
+ fd_gmem_screen_init(pscreen);
+
+ pscreen->get_name = fd_screen_get_name;
+ pscreen->get_vendor = fd_screen_get_vendor;
+ pscreen->get_device_vendor = fd_screen_get_device_vendor;
+
+ pscreen->get_timestamp = fd_screen_get_timestamp;
+
+ pscreen->fence_reference = _fd_fence_ref;
+ pscreen->fence_finish = fd_fence_finish;
+ pscreen->fence_get_fd = fd_fence_get_fd;
+
+ pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers;
+ pscreen->is_dmabuf_modifier_supported =
+ fd_screen_is_dmabuf_modifier_supported;
+
+ pscreen->get_device_uuid = fd_screen_get_device_uuid;
+ pscreen->get_driver_uuid = fd_screen_get_driver_uuid;
+
+ slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
+
+ return pscreen;
fail:
- fd_screen_destroy(pscreen);
- return NULL;
+ fd_screen_destroy(pscreen);
+ return NULL;
}
#ifndef FREEDRENO_SCREEN_H_
#define FREEDRENO_SCREEN_H_
+#include "common/freedreno_dev_info.h"
#include "drm/freedreno_drmif.h"
#include "drm/freedreno_ringbuffer.h"
#include "perfcntrs/freedreno_perfcntr.h"
-#include "common/freedreno_dev_info.h"
#include "pipe/p_screen.h"
+#include "renderonly/renderonly.h"
#include "util/debug.h"
+#include "util/simple_mtx.h"
+#include "util/slab.h"
#include "util/u_memory.h"
#include "util/u_queue.h"
-#include "util/slab.h"
-#include "util/simple_mtx.h"
-#include "renderonly/renderonly.h"
#include "freedreno_batch_cache.h"
#include "freedreno_gmem.h"
* generation backend can override this with screen->gmem_reason_mask
*/
enum fd_gmem_reason {
- FD_GMEM_CLEARS_DEPTH_STENCIL = BIT(0),
- FD_GMEM_DEPTH_ENABLED = BIT(1),
- FD_GMEM_STENCIL_ENABLED = BIT(2),
- FD_GMEM_BLEND_ENABLED = BIT(3),
- FD_GMEM_LOGICOP_ENABLED = BIT(4),
- FD_GMEM_FB_READ = BIT(5),
+ FD_GMEM_CLEARS_DEPTH_STENCIL = BIT(0),
+ FD_GMEM_DEPTH_ENABLED = BIT(1),
+ FD_GMEM_STENCIL_ENABLED = BIT(2),
+ FD_GMEM_BLEND_ENABLED = BIT(3),
+ FD_GMEM_LOGICOP_ENABLED = BIT(4),
+ FD_GMEM_FB_READ = BIT(5),
};
struct fd_screen {
- struct pipe_screen base;
+ struct pipe_screen base;
- struct list_head context_list;
+ struct list_head context_list;
- simple_mtx_t lock;
+ simple_mtx_t lock;
- /* it would be tempting to use pipe_reference here, but that
- * really doesn't work well if it isn't the first member of
- * the struct, so not quite so awesome to be adding refcnting
- * further down the inheritance hierarchy:
- */
- int refcnt;
+ /* it would be tempting to use pipe_reference here, but that
+ * really doesn't work well if it isn't the first member of
+ * the struct, so not quite so awesome to be adding refcnting
+ * further down the inheritance hierarchy:
+ */
+ int refcnt;
- /* place for winsys to stash it's own stuff: */
- void *winsys_priv;
+ /* place for winsys to stash it's own stuff: */
+ void *winsys_priv;
- struct slab_parent_pool transfer_pool;
+ struct slab_parent_pool transfer_pool;
- uint64_t gmem_base;
- uint32_t gmemsize_bytes;
- uint32_t device_id;
- uint32_t gpu_id; /* 220, 305, etc */
- uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
- uint32_t max_freq;
- uint32_t ram_size;
- uint32_t max_rts; /* max # of render targets */
- uint32_t priority_mask;
- bool has_timestamp;
- bool has_robustness;
- bool has_syncobj;
+ uint64_t gmem_base;
+ uint32_t gmemsize_bytes;
+ uint32_t device_id;
+ uint32_t gpu_id; /* 220, 305, etc */
+ uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
+ uint32_t max_freq;
+ uint32_t ram_size;
+ uint32_t max_rts; /* max # of render targets */
+ uint32_t priority_mask;
+ bool has_timestamp;
+ bool has_robustness;
+ bool has_syncobj;
- struct freedreno_dev_info info;
+ struct freedreno_dev_info info;
- /* Bitmask of gmem_reasons that do not force GMEM path over bypass
- * for current generation.
- */
- enum fd_gmem_reason gmem_reason_mask;
+ /* Bitmask of gmem_reasons that do not force GMEM path over bypass
+ * for current generation.
+ */
+ enum fd_gmem_reason gmem_reason_mask;
- unsigned num_perfcntr_groups;
- const struct fd_perfcntr_group *perfcntr_groups;
+ unsigned num_perfcntr_groups;
+ const struct fd_perfcntr_group *perfcntr_groups;
- /* generated at startup from the perfcntr groups: */
- unsigned num_perfcntr_queries;
- struct pipe_driver_query_info *perfcntr_queries;
+ /* generated at startup from the perfcntr groups: */
+ unsigned num_perfcntr_queries;
+ struct pipe_driver_query_info *perfcntr_queries;
- void *compiler; /* currently unused for a2xx */
- struct util_queue compile_queue; /* currently unused for a2xx */
+ void *compiler; /* currently unused for a2xx */
+ struct util_queue compile_queue; /* currently unused for a2xx */
- struct fd_device *dev;
+ struct fd_device *dev;
- /* NOTE: we still need a pipe associated with the screen in a few
- * places, like screen->get_timestamp(). For anything context
- * related, use ctx->pipe instead.
- */
- struct fd_pipe *pipe;
+ /* NOTE: we still need a pipe associated with the screen in a few
+ * places, like screen->get_timestamp(). For anything context
+ * related, use ctx->pipe instead.
+ */
+ struct fd_pipe *pipe;
- uint32_t (*setup_slices)(struct fd_resource *rsc);
- unsigned (*tile_mode)(const struct pipe_resource *prsc);
- int (*layout_resource_for_modifier)(struct fd_resource *rsc, uint64_t modifier);
+ uint32_t (*setup_slices)(struct fd_resource *rsc);
+ unsigned (*tile_mode)(const struct pipe_resource *prsc);
+ int (*layout_resource_for_modifier)(struct fd_resource *rsc,
+ uint64_t modifier);
- /* indirect-branch emit: */
- void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target);
+ /* indirect-branch emit: */
+ void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target);
- /* simple gpu "memcpy": */
- void (*mem_to_mem)(struct fd_ringbuffer *ring, struct pipe_resource *dst,
- unsigned dst_off, struct pipe_resource *src, unsigned src_off,
- unsigned sizedwords);
+ /* simple gpu "memcpy": */
+ void (*mem_to_mem)(struct fd_ringbuffer *ring, struct pipe_resource *dst,
+ unsigned dst_off, struct pipe_resource *src,
+ unsigned src_off, unsigned sizedwords);
- int64_t cpu_gpu_time_delta;
+ int64_t cpu_gpu_time_delta;
- struct fd_batch_cache batch_cache;
- struct fd_gmem_cache gmem_cache;
+ struct fd_batch_cache batch_cache;
+ struct fd_gmem_cache gmem_cache;
- bool reorder;
+ bool reorder;
- uint16_t rsc_seqno;
- uint16_t ctx_seqno;
+ uint16_t rsc_seqno;
+ uint16_t ctx_seqno;
- unsigned num_supported_modifiers;
- const uint64_t *supported_modifiers;
+ unsigned num_supported_modifiers;
+ const uint64_t *supported_modifiers;
- struct renderonly *ro;
+ struct renderonly *ro;
- /* when BATCH_DEBUG is enabled, tracking for fd_batch's which are not yet
- * freed:
- */
- struct set *live_batches;
+ /* when BATCH_DEBUG is enabled, tracking for fd_batch's which are not yet
+ * freed:
+ */
+ struct set *live_batches;
};
static inline struct fd_screen *
fd_screen(struct pipe_screen *pscreen)
{
- return (struct fd_screen *)pscreen;
+ return (struct fd_screen *)pscreen;
}
static inline void
fd_screen_lock(struct fd_screen *screen)
{
- simple_mtx_lock(&screen->lock);
+ simple_mtx_lock(&screen->lock);
}
static inline void
fd_screen_unlock(struct fd_screen *screen)
{
- simple_mtx_unlock(&screen->lock);
+ simple_mtx_unlock(&screen->lock);
}
static inline void
fd_screen_assert_locked(struct fd_screen *screen)
{
- simple_mtx_assert_locked(&screen->lock);
+ simple_mtx_assert_locked(&screen->lock);
}
-bool fd_screen_bo_get_handle(struct pipe_screen *pscreen,
- struct fd_bo *bo,
- struct renderonly_scanout *scanout,
- unsigned stride,
- struct winsys_handle *whandle);
-struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
- struct winsys_handle *whandle);
+bool fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo,
+ struct renderonly_scanout *scanout,
+ unsigned stride, struct winsys_handle *whandle);
+struct fd_bo *fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+ struct winsys_handle *whandle);
-struct pipe_screen *
-fd_screen_create(struct fd_device *dev, struct renderonly *ro);
+struct pipe_screen *fd_screen_create(struct fd_device *dev,
+ struct renderonly *ro);
static inline boolean
is_a20x(struct fd_screen *screen)
{
- return (screen->gpu_id >= 200) && (screen->gpu_id < 210);
+ return (screen->gpu_id >= 200) && (screen->gpu_id < 210);
}
static inline boolean
is_a2xx(struct fd_screen *screen)
{
- return (screen->gpu_id >= 200) && (screen->gpu_id < 300);
+ return (screen->gpu_id >= 200) && (screen->gpu_id < 300);
}
/* is a3xx patch revision 0? */
static inline boolean
is_a3xx_p0(struct fd_screen *screen)
{
- return (screen->chip_id & 0xff0000ff) == 0x03000000;
+ return (screen->chip_id & 0xff0000ff) == 0x03000000;
}
static inline boolean
is_a3xx(struct fd_screen *screen)
{
- return (screen->gpu_id >= 300) && (screen->gpu_id < 400);
+ return (screen->gpu_id >= 300) && (screen->gpu_id < 400);
}
static inline boolean
is_a4xx(struct fd_screen *screen)
{
- return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
+ return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
}
static inline boolean
is_a5xx(struct fd_screen *screen)
{
- return (screen->gpu_id >= 500) && (screen->gpu_id < 600);
+ return (screen->gpu_id >= 500) && (screen->gpu_id < 600);
}
static inline boolean
is_a6xx(struct fd_screen *screen)
{
- return (screen->gpu_id >= 600) && (screen->gpu_id < 700);
+ return (screen->gpu_id >= 600) && (screen->gpu_id < 700);
}
static inline boolean
is_a650(struct fd_screen *screen)
{
- return screen->gpu_id == 650;
+ return screen->gpu_id == 650;
}
/* is it using the ir3 compiler (shader isa introduced with a3xx)? */
static inline boolean
is_ir3(struct fd_screen *screen)
{
- return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
+ return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
+ is_a6xx(screen);
}
static inline bool
has_compute(struct fd_screen *screen)
{
- return is_a5xx(screen) || is_a6xx(screen);
+ return is_a5xx(screen) || is_a6xx(screen);
}
#endif /* FREEDRENO_SCREEN_H_ */
#include "pipe/p_state.h"
#include "util/u_dual_blend.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "freedreno_state.h"
#include "freedreno_context.h"
-#include "freedreno_resource.h"
-#include "freedreno_texture.h"
#include "freedreno_gmem.h"
#include "freedreno_query_hw.h"
+#include "freedreno_resource.h"
+#include "freedreno_state.h"
+#include "freedreno_texture.h"
#include "freedreno_util.h"
/* All the generic state handling.. In case of CSO's that are specific
*/
static void
-update_draw_cost(struct fd_context *ctx)
- assert_dt
+update_draw_cost(struct fd_context *ctx) assert_dt
{
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-
- ctx->draw_cost = pfb->nr_cbufs;
- for (unsigned i = 0; i < pfb->nr_cbufs; i++)
- if (fd_blend_enabled(ctx, i))
- ctx->draw_cost++;
- if (fd_depth_enabled(ctx))
- ctx->draw_cost++;
- if (fd_depth_write_enabled(ctx))
- ctx->draw_cost++;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+ ctx->draw_cost = pfb->nr_cbufs;
+ for (unsigned i = 0; i < pfb->nr_cbufs; i++)
+ if (fd_blend_enabled(ctx, i))
+ ctx->draw_cost++;
+ if (fd_depth_enabled(ctx))
+ ctx->draw_cost++;
+ if (fd_depth_write_enabled(ctx))
+ ctx->draw_cost++;
}
static void
fd_set_blend_color(struct pipe_context *pctx,
- const struct pipe_blend_color *blend_color)
- in_dt
+ const struct pipe_blend_color *blend_color) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->blend_color = *blend_color;
- fd_context_dirty(ctx, FD_DIRTY_BLEND_COLOR);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->blend_color = *blend_color;
+ fd_context_dirty(ctx, FD_DIRTY_BLEND_COLOR);
}
static void
fd_set_stencil_ref(struct pipe_context *pctx,
- const struct pipe_stencil_ref stencil_ref)
- in_dt
+ const struct pipe_stencil_ref stencil_ref) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->stencil_ref = stencil_ref;
- fd_context_dirty(ctx, FD_DIRTY_STENCIL_REF);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->stencil_ref = stencil_ref;
+ fd_context_dirty(ctx, FD_DIRTY_STENCIL_REF);
}
static void
fd_set_clip_state(struct pipe_context *pctx,
- const struct pipe_clip_state *clip)
- in_dt
+ const struct pipe_clip_state *clip) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->ucp = *clip;
- fd_context_dirty(ctx, FD_DIRTY_UCP);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->ucp = *clip;
+ fd_context_dirty(ctx, FD_DIRTY_UCP);
}
static void
-fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
- in_dt
+fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->sample_mask = (uint16_t)sample_mask;
- fd_context_dirty(ctx, FD_DIRTY_SAMPLE_MASK);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->sample_mask = (uint16_t)sample_mask;
+ fd_context_dirty(ctx, FD_DIRTY_SAMPLE_MASK);
}
static void
-fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples)
- in_dt
+fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->min_samples = min_samples;
- fd_context_dirty(ctx, FD_DIRTY_MIN_SAMPLES);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->min_samples = min_samples;
+ fd_context_dirty(ctx, FD_DIRTY_MIN_SAMPLES);
}
/* notes from calim on #dri-devel:
* index>0 will be UBO's.. well, I'll worry about that later
*/
static void
-fd_set_constant_buffer(struct pipe_context *pctx,
- enum pipe_shader_type shader, uint index,
- bool take_ownership,
- const struct pipe_constant_buffer *cb)
- in_dt
+fd_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader,
+ uint index, bool take_ownership,
+ const struct pipe_constant_buffer *cb) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_constbuf_stateobj *so = &ctx->constbuf[shader];
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_constbuf_stateobj *so = &ctx->constbuf[shader];
- util_copy_constant_buffer(&so->cb[index], cb, take_ownership);
+ util_copy_constant_buffer(&so->cb[index], cb, take_ownership);
- /* Note that gallium frontends can unbind constant buffers by
- * passing NULL here.
- */
- if (unlikely(!cb)) {
- so->enabled_mask &= ~(1 << index);
- return;
- }
+ /* Note that gallium frontends can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (unlikely(!cb)) {
+ so->enabled_mask &= ~(1 << index);
+ return;
+ }
- so->enabled_mask |= 1 << index;
+ so->enabled_mask |= 1 << index;
- fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_CONST);
- fd_resource_set_usage(cb->buffer, FD_DIRTY_CONST);
+ fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_CONST);
+ fd_resource_set_usage(cb->buffer, FD_DIRTY_CONST);
- if (index > 0) {
- assert(!cb->user_buffer);
- ctx->dirty |= FD_DIRTY_RESOURCE;
- }
+ if (index > 0) {
+ assert(!cb->user_buffer);
+ ctx->dirty |= FD_DIRTY_RESOURCE;
+ }
}
static void
-fd_set_shader_buffers(struct pipe_context *pctx,
- enum pipe_shader_type shader,
- unsigned start, unsigned count,
- const struct pipe_shader_buffer *buffers,
- unsigned writable_bitmask)
- in_dt
+fd_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
+ unsigned start, unsigned count,
+ const struct pipe_shader_buffer *buffers,
+ unsigned writable_bitmask) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader];
- const unsigned modified_bits = u_bit_consecutive(start, count);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader];
+ const unsigned modified_bits = u_bit_consecutive(start, count);
- so->enabled_mask &= ~modified_bits;
- so->writable_mask &= ~modified_bits;
- so->writable_mask |= writable_bitmask << start;
+ so->enabled_mask &= ~modified_bits;
+ so->writable_mask &= ~modified_bits;
+ so->writable_mask |= writable_bitmask << start;
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + start;
- struct pipe_shader_buffer *buf = &so->sb[n];
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + start;
+ struct pipe_shader_buffer *buf = &so->sb[n];
- if (buffers && buffers[i].buffer) {
- if ((buf->buffer == buffers[i].buffer) &&
- (buf->buffer_offset == buffers[i].buffer_offset) &&
- (buf->buffer_size == buffers[i].buffer_size))
- continue;
+ if (buffers && buffers[i].buffer) {
+ if ((buf->buffer == buffers[i].buffer) &&
+ (buf->buffer_offset == buffers[i].buffer_offset) &&
+ (buf->buffer_size == buffers[i].buffer_size))
+ continue;
- buf->buffer_offset = buffers[i].buffer_offset;
- buf->buffer_size = buffers[i].buffer_size;
- pipe_resource_reference(&buf->buffer, buffers[i].buffer);
+ buf->buffer_offset = buffers[i].buffer_offset;
+ buf->buffer_size = buffers[i].buffer_size;
+ pipe_resource_reference(&buf->buffer, buffers[i].buffer);
- fd_resource_set_usage(buffers[i].buffer, FD_DIRTY_SSBO);
+ fd_resource_set_usage(buffers[i].buffer, FD_DIRTY_SSBO);
- so->enabled_mask |= BIT(n);
- } else {
- pipe_resource_reference(&buf->buffer, NULL);
- }
- }
+ so->enabled_mask |= BIT(n);
+ } else {
+ pipe_resource_reference(&buf->buffer, NULL);
+ }
+ }
- fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_SSBO);
+ fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_SSBO);
}
void
-fd_set_shader_images(struct pipe_context *pctx,
- enum pipe_shader_type shader,
- unsigned start, unsigned count,
- unsigned unbind_num_trailing_slots,
- const struct pipe_image_view *images)
- in_dt
+fd_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
+ unsigned start, unsigned count,
+ unsigned unbind_num_trailing_slots,
+ const struct pipe_image_view *images) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
- unsigned mask = 0;
+ unsigned mask = 0;
- if (images) {
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + start;
- struct pipe_image_view *buf = &so->si[n];
+ if (images) {
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + start;
+ struct pipe_image_view *buf = &so->si[n];
- if ((buf->resource == images[i].resource) &&
- (buf->format == images[i].format) &&
- (buf->access == images[i].access) &&
- !memcmp(&buf->u, &images[i].u, sizeof(buf->u)))
- continue;
+ if ((buf->resource == images[i].resource) &&
+ (buf->format == images[i].format) &&
+ (buf->access == images[i].access) &&
+ !memcmp(&buf->u, &images[i].u, sizeof(buf->u)))
+ continue;
- mask |= BIT(n);
- util_copy_image_view(buf, &images[i]);
+ mask |= BIT(n);
+ util_copy_image_view(buf, &images[i]);
- if (buf->resource) {
- fd_resource_set_usage(buf->resource, FD_DIRTY_IMAGE);
- so->enabled_mask |= BIT(n);
- } else {
- so->enabled_mask &= ~BIT(n);
- }
- }
- } else {
- mask = (BIT(count) - 1) << start;
+ if (buf->resource) {
+ fd_resource_set_usage(buf->resource, FD_DIRTY_IMAGE);
+ so->enabled_mask |= BIT(n);
+ } else {
+ so->enabled_mask &= ~BIT(n);
+ }
+ }
+ } else {
+ mask = (BIT(count) - 1) << start;
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + start;
- struct pipe_image_view *img = &so->si[n];
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + start;
+ struct pipe_image_view *img = &so->si[n];
- pipe_resource_reference(&img->resource, NULL);
- }
+ pipe_resource_reference(&img->resource, NULL);
+ }
- so->enabled_mask &= ~mask;
- }
+ so->enabled_mask &= ~mask;
+ }
- for (unsigned i = 0; i < unbind_num_trailing_slots; i++)
- pipe_resource_reference(&so->si[i + start + count].resource, NULL);
+ for (unsigned i = 0; i < unbind_num_trailing_slots; i++)
+ pipe_resource_reference(&so->si[i + start + count].resource, NULL);
- so->enabled_mask &= ~(BITFIELD_MASK(unbind_num_trailing_slots) << (start + count));
+ so->enabled_mask &=
+ ~(BITFIELD_MASK(unbind_num_trailing_slots) << (start + count));
- fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_IMAGE);
+ fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_IMAGE);
}
static void
fd_set_framebuffer_state(struct pipe_context *pctx,
- const struct pipe_framebuffer_state *framebuffer)
- in_dt
+ const struct pipe_framebuffer_state *framebuffer) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_framebuffer_state *cso;
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_framebuffer_state *cso;
- DBG("%ux%u, %u layers, %u samples",
- framebuffer->width, framebuffer->height,
- framebuffer->layers, framebuffer->samples);
+ DBG("%ux%u, %u layers, %u samples", framebuffer->width, framebuffer->height,
+ framebuffer->layers, framebuffer->samples);
- cso = &ctx->framebuffer;
+ cso = &ctx->framebuffer;
- if (util_framebuffer_state_equal(cso, framebuffer))
- return;
+ if (util_framebuffer_state_equal(cso, framebuffer))
+ return;
- /* Do this *after* checking that the framebuffer state is actually
- * changing. In the fd_blitter_clear() path, we get a pfb update
- * to restore the current pfb state, which should not trigger us
- * to flush (as that can cause the batch to be freed at a point
- * before fd_clear() returns, but after the point where it expects
- * flushes to potentially happen.
- */
- fd_context_switch_from(ctx);
+ /* Do this *after* checking that the framebuffer state is actually
+ * changing. In the fd_blitter_clear() path, we get a pfb update
+ * to restore the current pfb state, which should not trigger us
+ * to flush (as that can cause the batch to be freed at a point
+ * before fd_clear() returns, but after the point where it expects
+ * flushes to potentially happen.
+ */
+ fd_context_switch_from(ctx);
- util_copy_framebuffer_state(cso, framebuffer);
+ util_copy_framebuffer_state(cso, framebuffer);
- cso->samples = util_framebuffer_get_num_samples(cso);
+ cso->samples = util_framebuffer_get_num_samples(cso);
- if (ctx->screen->reorder) {
- struct fd_batch *old_batch = NULL;
+ if (ctx->screen->reorder) {
+ struct fd_batch *old_batch = NULL;
- fd_batch_reference(&old_batch, ctx->batch);
+ fd_batch_reference(&old_batch, ctx->batch);
- if (likely(old_batch))
- fd_batch_finish_queries(old_batch);
+ if (likely(old_batch))
+ fd_batch_finish_queries(old_batch);
- fd_batch_reference(&ctx->batch, NULL);
- fd_context_all_dirty(ctx);
- ctx->update_active_queries = true;
+ fd_batch_reference(&ctx->batch, NULL);
+ fd_context_all_dirty(ctx);
+ ctx->update_active_queries = true;
- if (old_batch && old_batch->blit && !old_batch->back_blit) {
- /* for blits, there is not really much point in hanging on
- * to the uncommitted batch (ie. you probably don't blit
- * multiple times to the same surface), so we might as
- * well go ahead and flush this one:
- */
- fd_batch_flush(old_batch);
- }
+ if (old_batch && old_batch->blit && !old_batch->back_blit) {
+ /* for blits, there is not really much point in hanging on
+ * to the uncommitted batch (ie. you probably don't blit
+ * multiple times to the same surface), so we might as
+ * well go ahead and flush this one:
+ */
+ fd_batch_flush(old_batch);
+ }
- fd_batch_reference(&old_batch, NULL);
- } else if (ctx->batch) {
- DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
- framebuffer->cbufs[0], framebuffer->zsbuf);
- fd_batch_flush(ctx->batch);
- }
+ fd_batch_reference(&old_batch, NULL);
+ } else if (ctx->batch) {
+ DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
+ framebuffer->cbufs[0], framebuffer->zsbuf);
+ fd_batch_flush(ctx->batch);
+ }
- fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
+ fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
- ctx->disabled_scissor.minx = 0;
- ctx->disabled_scissor.miny = 0;
- ctx->disabled_scissor.maxx = cso->width;
- ctx->disabled_scissor.maxy = cso->height;
+ ctx->disabled_scissor.minx = 0;
+ ctx->disabled_scissor.miny = 0;
+ ctx->disabled_scissor.maxx = cso->width;
+ ctx->disabled_scissor.maxy = cso->height;
- fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
- update_draw_cost(ctx);
+ fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
+ update_draw_cost(ctx);
}
static void
fd_set_polygon_stipple(struct pipe_context *pctx,
- const struct pipe_poly_stipple *stipple)
- in_dt
+ const struct pipe_poly_stipple *stipple) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->stipple = *stipple;
- fd_context_dirty(ctx, FD_DIRTY_STIPPLE);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->stipple = *stipple;
+ fd_context_dirty(ctx, FD_DIRTY_STIPPLE);
}
static void
-fd_set_scissor_states(struct pipe_context *pctx,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *scissor)
- in_dt
+fd_set_scissor_states(struct pipe_context *pctx, unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- ctx->scissor = *scissor;
- fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
+ ctx->scissor = *scissor;
+ fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
}
static void
-fd_set_viewport_states(struct pipe_context *pctx,
- unsigned start_slot,
- unsigned num_viewports,
- const struct pipe_viewport_state *viewport)
- in_dt
+fd_set_viewport_states(struct pipe_context *pctx, unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewport) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
- float minx, miny, maxx, maxy;
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
+ float minx, miny, maxx, maxy;
- ctx->viewport = *viewport;
+ ctx->viewport = *viewport;
- /* see si_get_scissor_from_viewport(): */
+ /* see si_get_scissor_from_viewport(): */
- /* Convert (-1, -1) and (1, 1) from clip space into window space. */
- minx = -viewport->scale[0] + viewport->translate[0];
- miny = -viewport->scale[1] + viewport->translate[1];
- maxx = viewport->scale[0] + viewport->translate[0];
- maxy = viewport->scale[1] + viewport->translate[1];
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ minx = -viewport->scale[0] + viewport->translate[0];
+ miny = -viewport->scale[1] + viewport->translate[1];
+ maxx = viewport->scale[0] + viewport->translate[0];
+ maxy = viewport->scale[1] + viewport->translate[1];
- /* Handle inverted viewports. */
- if (minx > maxx) {
- swap(minx, maxx);
- }
- if (miny > maxy) {
- swap(miny, maxy);
- }
+ /* Handle inverted viewports. */
+ if (minx > maxx) {
+ swap(minx, maxx);
+ }
+ if (miny > maxy) {
+ swap(miny, maxy);
+ }
- const float max_dims = ctx->screen->gpu_id >= 400 ? 16384.f : 4096.f;
+ const float max_dims = ctx->screen->gpu_id >= 400 ? 16384.f : 4096.f;
- /* Clamp, convert to integer and round up the max bounds. */
- scissor->minx = CLAMP(minx, 0.f, max_dims);
- scissor->miny = CLAMP(miny, 0.f, max_dims);
- scissor->maxx = CLAMP(ceilf(maxx), 0.f, max_dims);
- scissor->maxy = CLAMP(ceilf(maxy), 0.f, max_dims);
+ /* Clamp, convert to integer and round up the max bounds. */
+ scissor->minx = CLAMP(minx, 0.f, max_dims);
+ scissor->miny = CLAMP(miny, 0.f, max_dims);
+ scissor->maxx = CLAMP(ceilf(maxx), 0.f, max_dims);
+ scissor->maxy = CLAMP(ceilf(maxy), 0.f, max_dims);
- fd_context_dirty(ctx, FD_DIRTY_VIEWPORT);
+ fd_context_dirty(ctx, FD_DIRTY_VIEWPORT);
}
static void
-fd_set_vertex_buffers(struct pipe_context *pctx,
- unsigned start_slot, unsigned count,
- unsigned unbind_num_trailing_slots,
- bool take_ownership,
- const struct pipe_vertex_buffer *vb)
- in_dt
+fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
+ unsigned count, unsigned unbind_num_trailing_slots,
+ bool take_ownership,
+ const struct pipe_vertex_buffer *vb) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_vertexbuf_stateobj *so = &ctx->vtx.vertexbuf;
- int i;
-
- /* on a2xx, pitch is encoded in the vtx fetch instruction, so
- * we need to mark VTXSTATE as dirty as well to trigger patching
- * and re-emitting the vtx shader:
- */
- if (ctx->screen->gpu_id < 300) {
- for (i = 0; i < count; i++) {
- bool new_enabled = vb && vb[i].buffer.resource;
- bool old_enabled = so->vb[i].buffer.resource != NULL;
- uint32_t new_stride = vb ? vb[i].stride : 0;
- uint32_t old_stride = so->vb[i].stride;
- if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
- fd_context_dirty(ctx, FD_DIRTY_VTXSTATE);
- break;
- }
- }
- }
-
- util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot,
- count, unbind_num_trailing_slots,
- take_ownership);
- so->count = util_last_bit(so->enabled_mask);
-
- if (!vb)
- return;
-
- fd_context_dirty(ctx, FD_DIRTY_VTXBUF);
-
- for (unsigned i = 0; i < count; i++) {
- assert(!vb[i].is_user_buffer);
- fd_resource_set_usage(vb[i].buffer.resource, FD_DIRTY_VTXBUF);
- }
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_vertexbuf_stateobj *so = &ctx->vtx.vertexbuf;
+ int i;
+
+ /* on a2xx, pitch is encoded in the vtx fetch instruction, so
+ * we need to mark VTXSTATE as dirty as well to trigger patching
+ * and re-emitting the vtx shader:
+ */
+ if (ctx->screen->gpu_id < 300) {
+ for (i = 0; i < count; i++) {
+ bool new_enabled = vb && vb[i].buffer.resource;
+ bool old_enabled = so->vb[i].buffer.resource != NULL;
+ uint32_t new_stride = vb ? vb[i].stride : 0;
+ uint32_t old_stride = so->vb[i].stride;
+ if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
+ fd_context_dirty(ctx, FD_DIRTY_VTXSTATE);
+ break;
+ }
+ }
+ }
+
+ util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot,
+ count, unbind_num_trailing_slots,
+ take_ownership);
+ so->count = util_last_bit(so->enabled_mask);
+
+ if (!vb)
+ return;
+
+ fd_context_dirty(ctx, FD_DIRTY_VTXBUF);
+
+ for (unsigned i = 0; i < count; i++) {
+ assert(!vb[i].is_user_buffer);
+ fd_resource_set_usage(vb[i].buffer.resource, FD_DIRTY_VTXBUF);
+ }
}
static void
-fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_blend_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_blend_state *cso = hwcso;
- bool old_is_dual = ctx->blend ?
- ctx->blend->rt[0].blend_enable && util_blend_state_is_dual(ctx->blend, 0) :
- false;
- bool new_is_dual = cso ?
- cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0) :
- false;
- ctx->blend = hwcso;
- fd_context_dirty(ctx, FD_DIRTY_BLEND);
- if (old_is_dual != new_is_dual)
- fd_context_dirty(ctx, FD_DIRTY_BLEND_DUAL);
- update_draw_cost(ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_blend_state *cso = hwcso;
+ bool old_is_dual = ctx->blend ? ctx->blend->rt[0].blend_enable &&
+ util_blend_state_is_dual(ctx->blend, 0)
+ : false;
+ bool new_is_dual =
+ cso ? cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0) : false;
+ ctx->blend = hwcso;
+ fd_context_dirty(ctx, FD_DIRTY_BLEND);
+ if (old_is_dual != new_is_dual)
+ fd_context_dirty(ctx, FD_DIRTY_BLEND_DUAL);
+ update_draw_cost(ctx);
}
static void
-fd_blend_state_delete(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_blend_state_delete(struct pipe_context *pctx, void *hwcso) in_dt
{
- FREE(hwcso);
+ FREE(hwcso);
}
static void
-fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct pipe_scissor_state *old_scissor = fd_context_get_scissor(ctx);
- bool discard = ctx->rasterizer && ctx->rasterizer->rasterizer_discard;
-
- ctx->rasterizer = hwcso;
- fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
-
- if (ctx->rasterizer && ctx->rasterizer->scissor) {
- ctx->current_scissor = &ctx->scissor;
- } else {
- ctx->current_scissor = &ctx->disabled_scissor;
- }
-
- /* if scissor enable bit changed we need to mark scissor
- * state as dirty as well:
- * NOTE: we can do a shallow compare, since we only care
- * if it changed to/from &ctx->disable_scissor
- */
- if (old_scissor != fd_context_get_scissor(ctx))
- fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
-
- if (ctx->rasterizer && (discard != ctx->rasterizer->rasterizer_discard))
- fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_DISCARD);
+ struct fd_context *ctx = fd_context(pctx);
+ struct pipe_scissor_state *old_scissor = fd_context_get_scissor(ctx);
+ bool discard = ctx->rasterizer && ctx->rasterizer->rasterizer_discard;
+
+ ctx->rasterizer = hwcso;
+ fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
+
+ if (ctx->rasterizer && ctx->rasterizer->scissor) {
+ ctx->current_scissor = &ctx->scissor;
+ } else {
+ ctx->current_scissor = &ctx->disabled_scissor;
+ }
+
+ /* if scissor enable bit changed we need to mark scissor
+ * state as dirty as well:
+ * NOTE: we can do a shallow compare, since we only care
+ * if it changed to/from &ctx->disable_scissor
+ */
+ if (old_scissor != fd_context_get_scissor(ctx))
+ fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
+
+ if (ctx->rasterizer && (discard != ctx->rasterizer->rasterizer_discard))
+ fd_context_dirty(ctx, FD_DIRTY_RASTERIZER_DISCARD);
}
static void
-fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso) in_dt
{
- FREE(hwcso);
+ FREE(hwcso);
}
static void
-fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->zsa = hwcso;
- fd_context_dirty(ctx, FD_DIRTY_ZSA);
- update_draw_cost(ctx);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->zsa = hwcso;
+ fd_context_dirty(ctx, FD_DIRTY_ZSA);
+ update_draw_cost(ctx);
}
static void
-fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso) in_dt
{
- FREE(hwcso);
+ FREE(hwcso);
}
static void *
fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
- const struct pipe_vertex_element *elements)
+ const struct pipe_vertex_element *elements)
{
- struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj);
+ struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj);
- if (!so)
- return NULL;
+ if (!so)
+ return NULL;
- memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
- so->num_elements = num_elements;
+ memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+ so->num_elements = num_elements;
- return so;
+ return so;
}
static void
-fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso) in_dt
{
- FREE(hwcso);
+ FREE(hwcso);
}
static void
-fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
- in_dt
+fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->vtx.vtx = hwcso;
- fd_context_dirty(ctx, FD_DIRTY_VTXSTATE);
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->vtx.vtx = hwcso;
+ fd_context_dirty(ctx, FD_DIRTY_VTXSTATE);
}
static struct pipe_stream_output_target *
fd_create_stream_output_target(struct pipe_context *pctx,
- struct pipe_resource *prsc, unsigned buffer_offset,
- unsigned buffer_size)
+ struct pipe_resource *prsc,
+ unsigned buffer_offset, unsigned buffer_size)
{
- struct fd_stream_output_target *target;
- struct fd_resource *rsc = fd_resource(prsc);
+ struct fd_stream_output_target *target;
+ struct fd_resource *rsc = fd_resource(prsc);
- target = CALLOC_STRUCT(fd_stream_output_target);
- if (!target)
- return NULL;
+ target = CALLOC_STRUCT(fd_stream_output_target);
+ if (!target)
+ return NULL;
- pipe_reference_init(&target->base.reference, 1);
- pipe_resource_reference(&target->base.buffer, prsc);
+ pipe_reference_init(&target->base.reference, 1);
+ pipe_resource_reference(&target->base.buffer, prsc);
- target->base.context = pctx;
- target->base.buffer_offset = buffer_offset;
- target->base.buffer_size = buffer_size;
+ target->base.context = pctx;
+ target->base.buffer_offset = buffer_offset;
+ target->base.buffer_size = buffer_size;
- target->offset_buf = pipe_buffer_create(pctx->screen,
- PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(uint32_t));
+ target->offset_buf = pipe_buffer_create(
+ pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(uint32_t));
- assert(rsc->b.b.target == PIPE_BUFFER);
- util_range_add(&rsc->b.b, &rsc->valid_buffer_range,
- buffer_offset, buffer_offset + buffer_size);
+ assert(rsc->b.b.target == PIPE_BUFFER);
+ util_range_add(&rsc->b.b, &rsc->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
- return &target->base;
+ return &target->base;
}
static void
fd_stream_output_target_destroy(struct pipe_context *pctx,
- struct pipe_stream_output_target *target)
+ struct pipe_stream_output_target *target)
{
- struct fd_stream_output_target *cso = fd_stream_output_target(target);
+ struct fd_stream_output_target *cso = fd_stream_output_target(target);
- pipe_resource_reference(&cso->base.buffer, NULL);
- pipe_resource_reference(&cso->offset_buf, NULL);
+ pipe_resource_reference(&cso->base.buffer, NULL);
+ pipe_resource_reference(&cso->offset_buf, NULL);
- FREE(target);
+ FREE(target);
}
static void
-fd_set_stream_output_targets(struct pipe_context *pctx,
- unsigned num_targets, struct pipe_stream_output_target **targets,
- const unsigned *offsets)
- in_dt
+fd_set_stream_output_targets(struct pipe_context *pctx, unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_streamout_stateobj *so = &ctx->streamout;
- unsigned i;
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
- debug_assert(num_targets <= ARRAY_SIZE(so->targets));
+ debug_assert(num_targets <= ARRAY_SIZE(so->targets));
- /* Older targets need sw stats enabled for streamout emulation in VS: */
- if (ctx->screen->gpu_id < 500) {
- if (num_targets && !so->num_targets) {
- ctx->stats_users++;
- } else if (so->num_targets && !num_targets) {
- ctx->stats_users--;
- }
- }
+ /* Older targets need sw stats enabled for streamout emulation in VS: */
+ if (ctx->screen->gpu_id < 500) {
+ if (num_targets && !so->num_targets) {
+ ctx->stats_users++;
+ } else if (so->num_targets && !num_targets) {
+ ctx->stats_users--;
+ }
+ }
- for (i = 0; i < num_targets; i++) {
- boolean changed = targets[i] != so->targets[i];
- boolean reset = (offsets[i] != (unsigned)-1);
+ for (i = 0; i < num_targets; i++) {
+ boolean changed = targets[i] != so->targets[i];
+ boolean reset = (offsets[i] != (unsigned)-1);
- so->reset |= (reset << i);
+ so->reset |= (reset << i);
- if (!changed && !reset)
- continue;
+ if (!changed && !reset)
+ continue;
- /* Note that all SO targets will be reset at once at a
- * BeginTransformFeedback().
- */
- if (reset) {
- so->offsets[i] = offsets[i];
- ctx->streamout.verts_written = 0;
- }
+ /* Note that all SO targets will be reset at once at a
+ * BeginTransformFeedback().
+ */
+ if (reset) {
+ so->offsets[i] = offsets[i];
+ ctx->streamout.verts_written = 0;
+ }
- pipe_so_target_reference(&so->targets[i], targets[i]);
- }
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+ }
- for (; i < so->num_targets; i++) {
- pipe_so_target_reference(&so->targets[i], NULL);
- }
+ for (; i < so->num_targets; i++) {
+ pipe_so_target_reference(&so->targets[i], NULL);
+ }
- so->num_targets = num_targets;
+ so->num_targets = num_targets;
- fd_context_dirty(ctx, FD_DIRTY_STREAMOUT);
+ fd_context_dirty(ctx, FD_DIRTY_STREAMOUT);
}
static void
-fd_bind_compute_state(struct pipe_context *pctx, void *state)
- in_dt
+fd_bind_compute_state(struct pipe_context *pctx, void *state) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- ctx->compute = state;
- /* NOTE: Don't mark FD_DIRTY_PROG for compute specific state */
- ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG;
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->compute = state;
+ /* NOTE: Don't mark FD_DIRTY_PROG for compute specific state */
+ ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG;
}
static void
-fd_set_compute_resources(struct pipe_context *pctx,
- unsigned start, unsigned count, struct pipe_surface **prscs)
- in_dt
+fd_set_compute_resources(struct pipe_context *pctx, unsigned start,
+ unsigned count, struct pipe_surface **prscs) in_dt
{
- // TODO
+ // TODO
}
/* used by clover to bind global objects, returning the bo address
* via handles[n]
*/
static void
-fd_set_global_binding(struct pipe_context *pctx,
- unsigned first, unsigned count, struct pipe_resource **prscs,
- uint32_t **handles)
- in_dt
+fd_set_global_binding(struct pipe_context *pctx, unsigned first, unsigned count,
+ struct pipe_resource **prscs, uint32_t **handles) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_global_bindings_stateobj *so = &ctx->global_bindings;
- unsigned mask = 0;
-
- if (prscs) {
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + first;
-
- mask |= BIT(n);
-
- pipe_resource_reference(&so->buf[n], prscs[i]);
-
- if (so->buf[n]) {
- struct fd_resource *rsc = fd_resource(so->buf[n]);
- uint64_t iova = fd_bo_get_iova(rsc->bo);
- // TODO need to scream if iova > 32b or fix gallium API..
- *handles[i] += iova;
- }
-
- if (prscs[i])
- so->enabled_mask |= BIT(n);
- else
- so->enabled_mask &= ~BIT(n);
- }
- } else {
- mask = (BIT(count) - 1) << first;
-
- for (unsigned i = 0; i < count; i++) {
- unsigned n = i + first;
- pipe_resource_reference(&so->buf[n], NULL);
- }
-
- so->enabled_mask &= ~mask;
- }
-
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_global_bindings_stateobj *so = &ctx->global_bindings;
+ unsigned mask = 0;
+
+ if (prscs) {
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + first;
+
+ mask |= BIT(n);
+
+ pipe_resource_reference(&so->buf[n], prscs[i]);
+
+ if (so->buf[n]) {
+ struct fd_resource *rsc = fd_resource(so->buf[n]);
+ uint64_t iova = fd_bo_get_iova(rsc->bo);
+ // TODO need to scream if iova > 32b or fix gallium API..
+ *handles[i] += iova;
+ }
+
+ if (prscs[i])
+ so->enabled_mask |= BIT(n);
+ else
+ so->enabled_mask &= ~BIT(n);
+ }
+ } else {
+ mask = (BIT(count) - 1) << first;
+
+ for (unsigned i = 0; i < count; i++) {
+ unsigned n = i + first;
+ pipe_resource_reference(&so->buf[n], NULL);
+ }
+
+ so->enabled_mask &= ~mask;
+ }
}
void
fd_state_init(struct pipe_context *pctx)
{
- pctx->set_blend_color = fd_set_blend_color;
- pctx->set_stencil_ref = fd_set_stencil_ref;
- pctx->set_clip_state = fd_set_clip_state;
- pctx->set_sample_mask = fd_set_sample_mask;
- pctx->set_min_samples = fd_set_min_samples;
- pctx->set_constant_buffer = fd_set_constant_buffer;
- pctx->set_shader_buffers = fd_set_shader_buffers;
- pctx->set_shader_images = fd_set_shader_images;
- pctx->set_framebuffer_state = fd_set_framebuffer_state;
- pctx->set_polygon_stipple = fd_set_polygon_stipple;
- pctx->set_scissor_states = fd_set_scissor_states;
- pctx->set_viewport_states = fd_set_viewport_states;
-
- pctx->set_vertex_buffers = fd_set_vertex_buffers;
-
- pctx->bind_blend_state = fd_blend_state_bind;
- pctx->delete_blend_state = fd_blend_state_delete;
-
- pctx->bind_rasterizer_state = fd_rasterizer_state_bind;
- pctx->delete_rasterizer_state = fd_rasterizer_state_delete;
-
- pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
- pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
-
- if (!pctx->create_vertex_elements_state)
- pctx->create_vertex_elements_state = fd_vertex_state_create;
- pctx->delete_vertex_elements_state = fd_vertex_state_delete;
- pctx->bind_vertex_elements_state = fd_vertex_state_bind;
-
- pctx->create_stream_output_target = fd_create_stream_output_target;
- pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
- pctx->set_stream_output_targets = fd_set_stream_output_targets;
-
- if (has_compute(fd_screen(pctx->screen))) {
- pctx->bind_compute_state = fd_bind_compute_state;
- pctx->set_compute_resources = fd_set_compute_resources;
- pctx->set_global_binding = fd_set_global_binding;
- }
+ pctx->set_blend_color = fd_set_blend_color;
+ pctx->set_stencil_ref = fd_set_stencil_ref;
+ pctx->set_clip_state = fd_set_clip_state;
+ pctx->set_sample_mask = fd_set_sample_mask;
+ pctx->set_min_samples = fd_set_min_samples;
+ pctx->set_constant_buffer = fd_set_constant_buffer;
+ pctx->set_shader_buffers = fd_set_shader_buffers;
+ pctx->set_shader_images = fd_set_shader_images;
+ pctx->set_framebuffer_state = fd_set_framebuffer_state;
+ pctx->set_polygon_stipple = fd_set_polygon_stipple;
+ pctx->set_scissor_states = fd_set_scissor_states;
+ pctx->set_viewport_states = fd_set_viewport_states;
+
+ pctx->set_vertex_buffers = fd_set_vertex_buffers;
+
+ pctx->bind_blend_state = fd_blend_state_bind;
+ pctx->delete_blend_state = fd_blend_state_delete;
+
+ pctx->bind_rasterizer_state = fd_rasterizer_state_bind;
+ pctx->delete_rasterizer_state = fd_rasterizer_state_delete;
+
+ pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
+ pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
+
+ if (!pctx->create_vertex_elements_state)
+ pctx->create_vertex_elements_state = fd_vertex_state_create;
+ pctx->delete_vertex_elements_state = fd_vertex_state_delete;
+ pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+
+ pctx->create_stream_output_target = fd_create_stream_output_target;
+ pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
+ pctx->set_stream_output_targets = fd_set_stream_output_targets;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->bind_compute_state = fd_bind_compute_state;
+ pctx->set_compute_resources = fd_set_compute_resources;
+ pctx->set_global_binding = fd_set_global_binding;
+ }
}
#include "pipe/p_context.h"
#include "freedreno_context.h"
-static inline bool fd_depth_enabled(struct fd_context *ctx)
- assert_dt
+static inline bool
+fd_depth_enabled(struct fd_context *ctx) assert_dt
{
- return ctx->zsa && ctx->zsa->depth_enabled;
+ return ctx->zsa && ctx->zsa->depth_enabled;
}
-static inline bool fd_depth_write_enabled(struct fd_context *ctx)
- assert_dt
+static inline bool
+fd_depth_write_enabled(struct fd_context *ctx) assert_dt
{
- return ctx->zsa && ctx->zsa->depth_writemask;
+ return ctx->zsa && ctx->zsa->depth_writemask;
}
-static inline bool fd_stencil_enabled(struct fd_context *ctx)
- assert_dt
+static inline bool
+fd_stencil_enabled(struct fd_context *ctx) assert_dt
{
- return ctx->zsa && ctx->zsa->stencil[0].enabled;
+ return ctx->zsa && ctx->zsa->stencil[0].enabled;
}
-static inline bool fd_blend_enabled(struct fd_context *ctx, unsigned n)
- assert_dt
+static inline bool
+fd_blend_enabled(struct fd_context *ctx, unsigned n) assert_dt
{
- return ctx->blend && ctx->blend->rt[n].blend_enable;
+ return ctx->blend && ctx->blend->rt[n].blend_enable;
}
-static inline bool fd_depth_clamp_enabled(struct fd_context *ctx)
- assert_dt
+static inline bool
+fd_depth_clamp_enabled(struct fd_context *ctx) assert_dt
{
- return !(ctx->rasterizer->depth_clip_near && ctx->rasterizer->depth_clip_far);
+ return !(ctx->rasterizer->depth_clip_near &&
+ ctx->rasterizer->depth_clip_far);
}
void fd_set_shader_images(struct pipe_context *pctx,
- enum pipe_shader_type shader,
- unsigned start, unsigned count,
- unsigned unbind_num_trailing_slots,
- const struct pipe_image_view *images);
+ enum pipe_shader_type shader, unsigned start,
+ unsigned count, unsigned unbind_num_trailing_slots,
+ const struct pipe_image_view *images);
void fd_state_init(struct pipe_context *pctx);
#include "freedreno_resource.h"
#include "freedreno_util.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
struct pipe_surface *
-fd_create_surface(struct pipe_context *pctx,
- struct pipe_resource *ptex,
- const struct pipe_surface *surf_tmpl)
+fd_create_surface(struct pipe_context *pctx, struct pipe_resource *ptex,
+ const struct pipe_surface *surf_tmpl)
{
- struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
-
- if (!surface)
- return NULL;
+ struct fd_surface *surface = CALLOC_STRUCT(fd_surface);
+ if (!surface)
+ return NULL;
- struct pipe_surface *psurf = &surface->base;
- unsigned level = surf_tmpl->u.tex.level;
+ struct pipe_surface *psurf = &surface->base;
+ unsigned level = surf_tmpl->u.tex.level;
- pipe_reference_init(&psurf->reference, 1);
- pipe_resource_reference(&psurf->texture, ptex);
+ pipe_reference_init(&psurf->reference, 1);
+ pipe_resource_reference(&psurf->texture, ptex);
- psurf->context = pctx;
- psurf->format = surf_tmpl->format;
- psurf->width = u_minify(ptex->width0, level);
- psurf->height = u_minify(ptex->height0, level);
- psurf->nr_samples = surf_tmpl->nr_samples;
+ psurf->context = pctx;
+ psurf->format = surf_tmpl->format;
+ psurf->width = u_minify(ptex->width0, level);
+ psurf->height = u_minify(ptex->height0, level);
+ psurf->nr_samples = surf_tmpl->nr_samples;
- if (ptex->target == PIPE_BUFFER) {
- psurf->u.buf.first_element = surf_tmpl->u.buf.first_element;
- psurf->u.buf.last_element = surf_tmpl->u.buf.last_element;
- } else {
- psurf->u.tex.level = level;
- psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
- psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- }
+ if (ptex->target == PIPE_BUFFER) {
+ psurf->u.buf.first_element = surf_tmpl->u.buf.first_element;
+ psurf->u.buf.last_element = surf_tmpl->u.buf.last_element;
+ } else {
+ psurf->u.tex.level = level;
+ psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ }
- return &surface->base;
+ return &surface->base;
}
void
fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
{
- pipe_resource_reference(&psurf->texture, NULL);
- FREE(psurf);
+ pipe_resource_reference(&psurf->texture, NULL);
+ FREE(psurf);
}
#include "pipe/p_state.h"
struct fd_surface {
- struct pipe_surface base;
+ struct pipe_surface base;
};
static inline struct fd_surface *
fd_surface(struct pipe_surface *psurf)
{
- return (struct fd_surface *)psurf;
+ return (struct fd_surface *)psurf;
}
-struct pipe_surface* fd_create_surface(struct pipe_context *pctx,
- struct pipe_resource *ptex,
- const struct pipe_surface *surf_tmpl);
+struct pipe_surface *fd_create_surface(struct pipe_context *pctx,
+ struct pipe_resource *ptex,
+ const struct pipe_surface *surf_tmpl);
void fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf);
#endif /* FREEDRENO_SURFACE_H_ */
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
-#include "freedreno_texture.h"
#include "freedreno_context.h"
#include "freedreno_resource.h"
+#include "freedreno_texture.h"
#include "freedreno_util.h"
static void
fd_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
{
- FREE(hwcso);
+ FREE(hwcso);
}
static void
fd_sampler_view_destroy(struct pipe_context *pctx,
- struct pipe_sampler_view *view)
+ struct pipe_sampler_view *view)
{
- pipe_resource_reference(&view->texture, NULL);
- FREE(view);
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
}
-static void bind_sampler_states(struct fd_texture_stateobj *tex,
- unsigned start, unsigned nr, void **hwcso)
+static void
+bind_sampler_states(struct fd_texture_stateobj *tex, unsigned start,
+ unsigned nr, void **hwcso)
{
- unsigned i;
-
- for (i = 0; i < nr; i++) {
- unsigned p = i + start;
- tex->samplers[p] = hwcso[i];
- if (tex->samplers[p])
- tex->valid_samplers |= (1 << p);
- else
- tex->valid_samplers &= ~(1 << p);
- }
-
- tex->num_samplers = util_last_bit(tex->valid_samplers);
+ unsigned i;
+
+ for (i = 0; i < nr; i++) {
+ unsigned p = i + start;
+ tex->samplers[p] = hwcso[i];
+ if (tex->samplers[p])
+ tex->valid_samplers |= (1 << p);
+ else
+ tex->valid_samplers &= ~(1 << p);
+ }
+
+ tex->num_samplers = util_last_bit(tex->valid_samplers);
}
-static void set_sampler_views(struct fd_texture_stateobj *tex,
- unsigned start, unsigned nr,
- unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
+static void
+set_sampler_views(struct fd_texture_stateobj *tex, unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views)
{
- unsigned i;
- unsigned samplers = 0;
-
- for (i = 0; i < nr; i++) {
- struct pipe_sampler_view *view = views ? views[i] : NULL;
- unsigned p = i + start;
- pipe_sampler_view_reference(&tex->textures[p], view);
- if (tex->textures[p]) {
- fd_resource_set_usage(tex->textures[p]->texture, FD_DIRTY_TEX);
- tex->valid_textures |= (1 << p);
- } else {
- tex->valid_textures &= ~(1 << p);
- }
- }
- for (; i < nr + unbind_num_trailing_slots; i++) {
- unsigned p = i + start;
- pipe_sampler_view_reference(&tex->textures[p], NULL);
- tex->valid_textures &= ~(1 << p);
- }
-
- tex->num_textures = util_last_bit(tex->valid_textures);
-
- for (i = 0; i < tex->num_textures; i++) {
- uint nr_samples = fd_resource_nr_samples(tex->textures[i]->texture);
- samplers |= (nr_samples >> 1) << (i * 2);
- }
-
- tex->samples = samplers;
+ unsigned i;
+ unsigned samplers = 0;
+
+ for (i = 0; i < nr; i++) {
+ struct pipe_sampler_view *view = views ? views[i] : NULL;
+ unsigned p = i + start;
+ pipe_sampler_view_reference(&tex->textures[p], view);
+ if (tex->textures[p]) {
+ fd_resource_set_usage(tex->textures[p]->texture, FD_DIRTY_TEX);
+ tex->valid_textures |= (1 << p);
+ } else {
+ tex->valid_textures &= ~(1 << p);
+ }
+ }
+ for (; i < nr + unbind_num_trailing_slots; i++) {
+ unsigned p = i + start;
+ pipe_sampler_view_reference(&tex->textures[p], NULL);
+ tex->valid_textures &= ~(1 << p);
+ }
+
+ tex->num_textures = util_last_bit(tex->valid_textures);
+
+ for (i = 0; i < tex->num_textures; i++) {
+ uint nr_samples = fd_resource_nr_samples(tex->textures[i]->texture);
+ samplers |= (nr_samples >> 1) << (i * 2);
+ }
+
+ tex->samples = samplers;
}
void
-fd_sampler_states_bind(struct pipe_context *pctx,
- enum pipe_shader_type shader, unsigned start,
- unsigned nr, void **hwcso)
- in_dt
+fd_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader,
+ unsigned start, unsigned nr, void **hwcso) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- bind_sampler_states(&ctx->tex[shader], start, nr, hwcso);
- fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_TEX);
+ bind_sampler_states(&ctx->tex[shader], start, nr, hwcso);
+ fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_TEX);
}
void
fd_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
- unsigned start, unsigned nr, unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views)
- in_dt
+ unsigned start, unsigned nr,
+ unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views) in_dt
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- set_sampler_views(&ctx->tex[shader], start, nr, unbind_num_trailing_slots, views);
- fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_TEX);
+ set_sampler_views(&ctx->tex[shader], start, nr, unbind_num_trailing_slots,
+ views);
+ fd_context_dirty_shader(ctx, shader, FD_DIRTY_SHADER_TEX);
}
void
fd_texture_init(struct pipe_context *pctx)
{
- if (!pctx->delete_sampler_state)
- pctx->delete_sampler_state = fd_sampler_state_delete;
- if (!pctx->sampler_view_destroy)
- pctx->sampler_view_destroy = fd_sampler_view_destroy;
+ if (!pctx->delete_sampler_state)
+ pctx->delete_sampler_state = fd_sampler_state_delete;
+ if (!pctx->sampler_view_destroy)
+ pctx->sampler_view_destroy = fd_sampler_view_destroy;
}
/* helper for setting up border-color buffer for a3xx/a4xx: */
void
fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
- unsigned offset)
+ unsigned offset)
{
- unsigned i, j;
-
- for (i = 0; i < tex->num_samplers; i++) {
- struct pipe_sampler_state *sampler = tex->samplers[i];
- uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
- (BORDERCOLOR_SIZE * offset) +
- (BORDERCOLOR_SIZE * i));
- uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
- if (!sampler)
- continue;
-
- /*
- * XXX HACK ALERT XXX
- *
- * The border colors need to be swizzled in a particular
- * format-dependent order. Even though samplers don't know about
- * formats, we can assume that with a GL state tracker, there's a
- * 1:1 correspondence between sampler and texture. Take advantage
- * of that knowledge.
- */
- if (i < tex->num_textures && tex->textures[i]) {
- const struct util_format_description *desc =
- util_format_description(tex->textures[i]->format);
- for (j = 0; j < 4; j++) {
- if (desc->swizzle[j] >= 4)
- continue;
-
- const struct util_format_channel_description *chan =
- &desc->channel[desc->swizzle[j]];
- if (chan->pure_integer) {
- bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
- bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
- } else {
- bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
- bcolor[desc->swizzle[j]] =
- _mesa_float_to_half(sampler->border_color.f[j]);
- }
- }
- }
- }
+ unsigned i, j;
+
+ for (i = 0; i < tex->num_samplers; i++) {
+ struct pipe_sampler_state *sampler = tex->samplers[i];
+ uint16_t *bcolor =
+ (uint16_t *)((uint8_t *)ptr + (BORDERCOLOR_SIZE * offset) +
+ (BORDERCOLOR_SIZE * i));
+ uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
+
+ if (!sampler)
+ continue;
+
+ /*
+ * XXX HACK ALERT XXX
+ *
+ * The border colors need to be swizzled in a particular
+ * format-dependent order. Even though samplers don't know about
+ * formats, we can assume that with a GL state tracker, there's a
+ * 1:1 correspondence between sampler and texture. Take advantage
+ * of that knowledge.
+ */
+ if (i < tex->num_textures && tex->textures[i]) {
+ const struct util_format_description *desc =
+ util_format_description(tex->textures[i]->format);
+ for (j = 0; j < 4; j++) {
+ if (desc->swizzle[j] >= 4)
+ continue;
+
+ const struct util_format_channel_description *chan =
+ &desc->channel[desc->swizzle[j]];
+ if (chan->pure_integer) {
+ bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
+ bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
+ } else {
+ bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
+ bcolor[desc->swizzle[j]] =
+ _mesa_float_to_half(sampler->border_color.f[j]);
+ }
+ }
+ }
+ }
}
#include "pipe/p_context.h"
void fd_sampler_states_bind(struct pipe_context *pctx,
- enum pipe_shader_type shader, unsigned start,
- unsigned nr, void **hwcso);
+ enum pipe_shader_type shader, unsigned start,
+ unsigned nr, void **hwcso);
void fd_set_sampler_views(struct pipe_context *pctx,
- enum pipe_shader_type shader,
- unsigned start, unsigned nr,
- unsigned unbind_num_trailing_slots,
- struct pipe_sampler_view **views);
+ enum pipe_shader_type shader, unsigned start,
+ unsigned nr, unsigned unbind_num_trailing_slots,
+ struct pipe_sampler_view **views);
void fd_texture_init(struct pipe_context *pctx);
*/
#define BORDERCOLOR_SIZE 0x40
void fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
- unsigned offset);
+ unsigned offset);
#endif /* FREEDRENO_TEXTURE_H_ */
enum adreno_rb_depth_format
fd_pipe2depth(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTHX_16;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- return DEPTHX_24_8;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return DEPTHX_32;
- default:
- return ~0;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTHX_16;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ return DEPTHX_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTHX_32;
+ default:
+ return ~0;
+ }
}
enum pc_di_index_size
fd_pipe2index(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_I8_UINT:
- return INDEX_SIZE_8_BIT;
- case PIPE_FORMAT_I16_UINT:
- return INDEX_SIZE_16_BIT;
- case PIPE_FORMAT_I32_UINT:
- return INDEX_SIZE_32_BIT;
- default:
- return ~0;
- }
+ switch (format) {
+ case PIPE_FORMAT_I8_UINT:
+ return INDEX_SIZE_8_BIT;
+ case PIPE_FORMAT_I16_UINT:
+ return INDEX_SIZE_16_BIT;
+ case PIPE_FORMAT_I32_UINT:
+ return INDEX_SIZE_32_BIT;
+ default:
+ return ~0;
+ }
}
/* we need to special case a bit the depth/stencil restore, because we are
enum pipe_format
fd_gmem_restore_format(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return PIPE_FORMAT_R8G8B8A8_UNORM;
- case PIPE_FORMAT_Z16_UNORM:
- return PIPE_FORMAT_R8G8_UNORM;
- case PIPE_FORMAT_S8_UINT:
- return PIPE_FORMAT_R8_UNORM;
- default:
- return format;
- }
+ switch (format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+ case PIPE_FORMAT_Z16_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return PIPE_FORMAT_R8_UNORM;
+ default:
+ return format;
+ }
}
enum adreno_rb_blend_factor
fd_blend_factor(unsigned factor)
{
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- return FACTOR_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return FACTOR_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return FACTOR_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return FACTOR_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR:
- return FACTOR_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return FACTOR_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return FACTOR_CONSTANT_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return FACTOR_CONSTANT_ALPHA;
- case PIPE_BLENDFACTOR_ZERO:
- case 0:
- return FACTOR_ZERO;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return FACTOR_ONE_MINUS_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return FACTOR_ONE_MINUS_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return FACTOR_ONE_MINUS_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return FACTOR_ONE_MINUS_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return FACTOR_ONE_MINUS_CONSTANT_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return FACTOR_ONE_MINUS_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return FACTOR_ONE_MINUS_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- return FACTOR_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- return FACTOR_SRC1_ALPHA;
- default:
- DBG("invalid blend factor: %x", factor);
- return 0;
- }
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return FACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return FACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return FACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return FACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return FACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return FACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return FACTOR_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return FACTOR_CONSTANT_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO:
+ case 0:
+ return FACTOR_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return FACTOR_ONE_MINUS_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return FACTOR_ONE_MINUS_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return FACTOR_ONE_MINUS_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return FACTOR_ONE_MINUS_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return FACTOR_ONE_MINUS_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return FACTOR_ONE_MINUS_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return FACTOR_ONE_MINUS_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return FACTOR_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ return FACTOR_SRC1_ALPHA;
+ default:
+ DBG("invalid blend factor: %x", factor);
+ return 0;
+ }
}
enum adreno_pa_su_sc_draw
fd_polygon_mode(unsigned mode)
{
- switch (mode) {
- case PIPE_POLYGON_MODE_POINT:
- return PC_DRAW_POINTS;
- case PIPE_POLYGON_MODE_LINE:
- return PC_DRAW_LINES;
- case PIPE_POLYGON_MODE_FILL:
- return PC_DRAW_TRIANGLES;
- default:
- DBG("invalid polygon mode: %u", mode);
- return 0;
- }
+ switch (mode) {
+ case PIPE_POLYGON_MODE_POINT:
+ return PC_DRAW_POINTS;
+ case PIPE_POLYGON_MODE_LINE:
+ return PC_DRAW_LINES;
+ case PIPE_POLYGON_MODE_FILL:
+ return PC_DRAW_TRIANGLES;
+ default:
+ DBG("invalid polygon mode: %u", mode);
+ return 0;
+ }
}
enum adreno_stencil_op
fd_stencil_op(unsigned op)
{
- switch (op) {
- case PIPE_STENCIL_OP_KEEP:
- return STENCIL_KEEP;
- case PIPE_STENCIL_OP_ZERO:
- return STENCIL_ZERO;
- case PIPE_STENCIL_OP_REPLACE:
- return STENCIL_REPLACE;
- case PIPE_STENCIL_OP_INCR:
- return STENCIL_INCR_CLAMP;
- case PIPE_STENCIL_OP_DECR:
- return STENCIL_DECR_CLAMP;
- case PIPE_STENCIL_OP_INCR_WRAP:
- return STENCIL_INCR_WRAP;
- case PIPE_STENCIL_OP_DECR_WRAP:
- return STENCIL_DECR_WRAP;
- case PIPE_STENCIL_OP_INVERT:
- return STENCIL_INVERT;
- default:
- DBG("invalid stencil op: %u", op);
- return 0;
- }
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return STENCIL_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return STENCIL_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return STENCIL_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return STENCIL_INCR_CLAMP;
+ case PIPE_STENCIL_OP_DECR:
+ return STENCIL_DECR_CLAMP;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return STENCIL_INCR_WRAP;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return STENCIL_DECR_WRAP;
+ case PIPE_STENCIL_OP_INVERT:
+ return STENCIL_INVERT;
+ default:
+ DBG("invalid stencil op: %u", op);
+ return 0;
+ }
}
#include "pipe/p_format.h"
#include "pipe/p_state.h"
#include "util/compiler.h"
+#include "util/half_float.h"
#include "util/log.h"
#include "util/u_debug.h"
-#include "util/u_math.h"
-#include "util/half_float.h"
#include "util/u_dynarray.h"
+#include "util/u_math.h"
#include "util/u_pack_color.h"
-#include "disasm.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
+#include "disasm.h"
enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);
enum pc_di_index_size fd_pipe2index(enum pipe_format format);
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
-#define FD_DBG(category) unlikely(fd_mesa_debug & FD_DBG_##category)
+#define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category)
-#define DBG(fmt, ...) \
- do { if (FD_DBG(MSGS)) \
- mesa_logd("%s:%d: "fmt, \
- __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+#define DBG(fmt, ...) \
+ do { \
+ if (FD_DBG(MSGS)) \
+ mesa_logd("%s:%d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
-#define perf_debug_ctx(ctx, ...) do { \
- if (FD_DBG(PERF)) \
- mesa_logw(__VA_ARGS__); \
- struct fd_context *__c = (ctx); \
- if (__c) \
- pipe_debug_message(&__c->debug, PERF_INFO, __VA_ARGS__); \
- } while(0)
+#define perf_debug_ctx(ctx, ...) \
+ do { \
+ if (FD_DBG(PERF)) \
+ mesa_logw(__VA_ARGS__); \
+ struct fd_context *__c = (ctx); \
+ if (__c) \
+ pipe_debug_message(&__c->debug, PERF_INFO, __VA_ARGS__); \
+ } while (0)
#define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__)
-#define perf_time_ctx(ctx, limit_ns, fmt, ...) for( \
- struct __perf_time_state __s = { \
- .t = -__perf_get_time(ctx), \
- }; \
- !__s.done; \
- ({ \
- __s.t += __perf_get_time(ctx); \
- __s.done = true; \
- if (__s.t > (limit_ns)) { \
- perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__, (double)__s.t / 1000000.0); \
- } \
- }))
-
-#define perf_time(limit_ns, fmt, ...) perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__)
+#define perf_time_ctx(ctx, limit_ns, fmt, ...) \
+ for (struct __perf_time_state __s = \
+ { \
+ .t = -__perf_get_time(ctx), \
+ }; \
+ !__s.done; ({ \
+ __s.t += __perf_get_time(ctx); \
+ __s.done = true; \
+ if (__s.t > (limit_ns)) { \
+ perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__, \
+ (double)__s.t / 1000000.0); \
+ } \
+ }))
+
+#define perf_time(limit_ns, fmt, ...) \
+ perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__)
struct __perf_time_state {
- int64_t t;
- bool done;
+ int64_t t;
+ bool done;
};
/* static inline would be nice here, except 'struct fd_context' is not
* defined yet:
*/
-#define __perf_get_time(ctx) \
- ((FD_DBG(PERF) || \
- ({ struct fd_context *__c = (ctx); \
- unlikely(__c && __c->debug.debug_message); })) ? \
- os_time_get_nano() : 0)
+#define __perf_get_time(ctx) \
+ ((FD_DBG(PERF) || ({ \
+ struct fd_context *__c = (ctx); \
+ unlikely(__c && __c->debug.debug_message); \
+ })) \
+ ? os_time_get_nano() \
+ : 0)
struct fd_context;
*/
static inline void
fd_context_access_begin(struct fd_context *ctx)
- acquire_cap(fd_context_access_cap)
+ acquire_cap(fd_context_access_cap)
{
}
static inline void
-fd_context_access_end(struct fd_context *ctx)
- release_cap(fd_context_access_cap)
+fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap)
{
}
-
/* for conditionally setting boolean flag(s): */
#define COND(bool, val) ((bool) ? (val) : 0)
#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))
-static inline uint32_t DRAW(enum pc_di_primtype prim_type,
- enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
- enum pc_di_vis_cull_mode vis_cull_mode,
- uint8_t instances)
+static inline uint32_t
+DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
+ enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode,
+ uint8_t instances)
{
- return (prim_type << 0) |
- (source_select << 6) |
- ((index_size & 1) << 11) |
- ((index_size >> 1) << 13) |
- (vis_cull_mode << 9) |
- (1 << 14) |
- (instances << 24);
+ return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) |
+ ((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) |
+ (instances << 24);
}
-static inline uint32_t DRAW_A20X(enum pc_di_primtype prim_type,
- enum pc_di_face_cull_sel faceness_cull_select,
- enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
- bool pre_fetch_cull_enable,
- bool grp_cull_enable,
- uint16_t count)
+static inline uint32_t
+DRAW_A20X(enum pc_di_primtype prim_type,
+ enum pc_di_face_cull_sel faceness_cull_select,
+ enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+ bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count)
{
- return (prim_type << 0) |
- (source_select << 6) |
- (faceness_cull_select << 8) |
- ((index_size & 1) << 11) |
- ((index_size >> 1) << 13) |
- (pre_fetch_cull_enable << 14) |
- (grp_cull_enable << 15) |
- (count << 16);
+ return (prim_type << 0) | (source_select << 6) |
+ (faceness_cull_select << 8) | ((index_size & 1) << 11) |
+ ((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) |
+ (grp_cull_enable << 15) | (count << 16);
}
/* for tracking cmdstream positions that need to be patched: */
struct fd_cs_patch {
- uint32_t *cs;
- uint32_t val;
+ uint32_t *cs;
+ uint32_t val;
};
#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
-#define fd_patch_element(buf, i) util_dynarray_element(buf, struct fd_cs_patch, i)
+#define fd_patch_element(buf, i) \
+ util_dynarray_element(buf, struct fd_cs_patch, i)
static inline enum pipe_format
pipe_surface_format(struct pipe_surface *psurf)
{
- if (!psurf)
- return PIPE_FORMAT_NONE;
- return psurf->format;
+ if (!psurf)
+ return PIPE_FORMAT_NONE;
+ return psurf->format;
}
static inline bool
fd_surface_half_precision(const struct pipe_surface *psurf)
{
- enum pipe_format format;
+ enum pipe_format format;
- if (!psurf)
- return true;
+ if (!psurf)
+ return true;
- format = psurf->format;
+ format = psurf->format;
- /* colors are provided in consts, which go through cov.f32f16, which will
- * break these values
- */
- if (util_format_is_pure_integer(format))
- return false;
+ /* colors are provided in consts, which go through cov.f32f16, which will
+ * break these values
+ */
+ if (util_format_is_pure_integer(format))
+ return false;
- /* avoid losing precision on 32-bit float formats */
- if (util_format_is_float(format) &&
- util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
- return false;
+ /* avoid losing precision on 32-bit float formats */
+ if (util_format_is_float(format) &&
+ util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) ==
+ 32)
+ return false;
- return true;
+ return true;
}
static inline unsigned
fd_sampler_first_level(const struct pipe_sampler_view *view)
{
- if (view->target == PIPE_BUFFER)
- return 0;
- return view->u.tex.first_level;
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.first_level;
}
static inline unsigned
fd_sampler_last_level(const struct pipe_sampler_view *view)
{
- if (view->target == PIPE_BUFFER)
- return 0;
- return view->u.tex.last_level;
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.last_level;
}
static inline bool
fd_half_precision(struct pipe_framebuffer_state *pfb)
{
- unsigned i;
+ unsigned i;
- for (i = 0; i < pfb->nr_cbufs; i++)
- if (!fd_surface_half_precision(pfb->cbufs[i]))
- return false;
+ for (i = 0; i < pfb->nr_cbufs; i++)
+ if (!fd_surface_half_precision(pfb->cbufs[i]))
+ return false;
- return true;
+ return true;
}
static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
static inline void
-OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
- struct util_dynarray *buf)
+OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf)
{
- if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RINGP %04x: %08x", ring,
- (uint32_t)(ring->cur - ring->start), data);
- }
- util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){
- .cs = ring->cur++,
- .val = data,
- }));
+ if (LOG_DWORDS) {
+ DBG("ring[%p]: OUT_RINGP %04x: %08x", ring,
+ (uint32_t)(ring->cur - ring->start), data);
+ }
+ util_dynarray_append(buf, struct fd_cs_patch,
+ ((struct fd_cs_patch){
+ .cs = ring->cur++,
+ .val = data,
+ }));
}
static inline void
-__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target)
+__OUT_IB(struct fd_ringbuffer *ring, bool prefetch,
+ struct fd_ringbuffer *target)
{
- if (target->cur == target->start)
- return;
-
- unsigned count = fd_ringbuffer_cmd_count(target);
-
- /* for debug after a lock up, write a unique counter value
- * to scratch6 for each IB, to make it easier to match up
- * register dumps to cmdstream. The combination of IB and
- * DRAW (scratch7) is enough to "triangulate" the particular
- * draw that caused lockup.
- */
- emit_marker(ring, 6);
-
- for (unsigned i = 0; i < count; i++) {
- uint32_t dwords;
- OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
- dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
- assert(dwords > 0);
- OUT_RING(ring, dwords);
- OUT_PKT2(ring);
- }
-
- emit_marker(ring, 6);
+ if (target->cur == target->start)
+ return;
+
+ unsigned count = fd_ringbuffer_cmd_count(target);
+
+ /* for debug after a lock up, write a unique counter value
+ * to scratch6 for each IB, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB and
+ * DRAW (scratch7) is enough to "triangulate" the particular
+ * draw that caused lockup.
+ */
+ emit_marker(ring, 6);
+
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t dwords;
+ OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD,
+ 2);
+ dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
+ assert(dwords > 0);
+ OUT_RING(ring, dwords);
+ OUT_PKT2(ring);
+ }
+
+ emit_marker(ring, 6);
}
static inline void
__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
- if (target->cur == target->start)
- return;
-
- unsigned count = fd_ringbuffer_cmd_count(target);
-
- for (unsigned i = 0; i < count; i++) {
- uint32_t dwords;
- OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
- dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
- assert(dwords > 0);
- OUT_RING(ring, dwords);
- }
+ if (target->cur == target->start)
+ return;
+
+ unsigned count = fd_ringbuffer_cmd_count(target);
+
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t dwords;
+ OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
+ dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
+ assert(dwords > 0);
+ OUT_RING(ring, dwords);
+ }
}
/* CP_SCRATCH_REG4 is used to hold base address for query results: */
#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
#ifdef DEBUG
-# define __EMIT_MARKER 1
+#define __EMIT_MARKER 1
#else
-# define __EMIT_MARKER 0
+#define __EMIT_MARKER 0
#endif
static inline void
emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
{
- extern int32_t marker_cnt;
- unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
- assert(reg != HW_QUERY_BASE_REG);
- if (reg == HW_QUERY_BASE_REG)
- return;
- if (__EMIT_MARKER) {
- OUT_WFI5(ring);
- OUT_PKT0(ring, reg, 1);
- OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
- }
+ extern int32_t marker_cnt;
+ unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
+ assert(reg != HW_QUERY_BASE_REG);
+ if (reg == HW_QUERY_BASE_REG)
+ return;
+ if (__EMIT_MARKER) {
+ OUT_WFI5(ring);
+ OUT_PKT0(ring, reg, 1);
+ OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
+ }
}
static inline uint32_t
pack_rgba(enum pipe_format format, const float *rgba)
{
- union util_color uc;
- util_pack_color(rgba, format, &uc);
- return uc.ui[0];
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+ return uc.ui[0];
}
/*
* swap - swap value of @a and @b
*/
-#define swap(a, b) \
- do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#define swap(a, b) \
+ do { \
+ __typeof(a) __tmp = (a); \
+ (a) = (b); \
+ (b) = __tmp; \
+ } while (0)
#define BIT(bit) (1u << bit)
static inline enum a3xx_msaa_samples
fd_msaa_samples(unsigned samples)
{
- switch (samples) {
- default:
- debug_assert(0);
- FALLTHROUGH;
- case 0:
- case 1: return MSAA_ONE;
- case 2: return MSAA_TWO;
- case 4: return MSAA_FOUR;
- case 8: return MSAA_EIGHT;
- }
+ switch (samples) {
+ default:
+ debug_assert(0);
+ FALLTHROUGH;
+ case 0:
+ case 1:
+ return MSAA_ONE;
+ case 2:
+ return MSAA_TWO;
+ case 4:
+ return MSAA_FOUR;
+ case 8:
+ return MSAA_EIGHT;
+ }
}
/*
static inline enum a4xx_state_block
fd4_stage2shadersb(gl_shader_stage type)
{
- switch (type) {
- case MESA_SHADER_VERTEX:
- return SB4_VS_SHADER;
- case MESA_SHADER_FRAGMENT:
- return SB4_FS_SHADER;
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_KERNEL:
- return SB4_CS_SHADER;
- default:
- unreachable("bad shader type");
- return ~0;
- }
+ switch (type) {
+ case MESA_SHADER_VERTEX:
+ return SB4_VS_SHADER;
+ case MESA_SHADER_FRAGMENT:
+ return SB4_FS_SHADER;
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ return SB4_CS_SHADER;
+ default:
+ unreachable("bad shader type");
+ return ~0;
+ }
}
static inline enum a4xx_index_size
fd4_size2indextype(unsigned index_size)
{
- switch (index_size) {
- case 1: return INDEX4_SIZE_8_BIT;
- case 2: return INDEX4_SIZE_16_BIT;
- case 4: return INDEX4_SIZE_32_BIT;
- }
- DBG("unsupported index size: %d", index_size);
- assert(0);
- return INDEX4_SIZE_32_BIT;
+ switch (index_size) {
+ case 1:
+ return INDEX4_SIZE_8_BIT;
+ case 2:
+ return INDEX4_SIZE_16_BIT;
+ case 4:
+ return INDEX4_SIZE_32_BIT;
+ }
+ DBG("unsupported index size: %d", index_size);
+ assert(0);
+ return INDEX4_SIZE_32_BIT;
}
#endif /* FREEDRENO_UTIL_H_ */
/* clang-format on */
struct gpu_info {
- const char *name;
- uint32_t gpu_id;
- uint8_t gmem_page_align;
- uint32_t gmemsize_bytes;
+ const char *name;
+ uint32_t gpu_id;
+ uint8_t gmem_page_align;
+ uint32_t gmemsize_bytes;
};
-#define SZ_128K 0x00020000
-#define SZ_256K 0x00040000
-#define SZ_512K 0x00080000
-#define SZ_1M 0x00100000
+#define SZ_128K 0x00020000
+#define SZ_256K 0x00040000
+#define SZ_512K 0x00080000
+#define SZ_1M 0x00100000
/* keep sorted by gpu name: */
static const struct gpu_info gpu_infos[] = {
- { "a306", 307, 4, SZ_128K },
- { "a405", 405, 4, SZ_256K },
- { "a530", 530, 4, SZ_1M },
- { "a618", 618, 1, SZ_512K },
- { "a630", 630, 1, SZ_1M },
- { "a650", 630, 1, SZ_1M + SZ_128K },
+ {"a306", 307, 4, SZ_128K}, {"a405", 405, 4, SZ_256K},
+ {"a530", 530, 4, SZ_1M}, {"a618", 618, 1, SZ_512K},
+ {"a630", 630, 1, SZ_1M}, {"a650", 630, 1, SZ_1M + SZ_128K},
};
-
static const struct option opts[] = {
- { .name = "gpu", .has_arg = 1, NULL, 'g' },
- { .name = "help", .has_arg = 0, NULL, 'h' },
- { .name = "verbose", .has_arg = 0, NULL, 'v' },
- {}
-};
+ {.name = "gpu", .has_arg = 1, NULL, 'g'},
+ {.name = "help", .has_arg = 0, NULL, 'h'},
+ {.name = "verbose", .has_arg = 0, NULL, 'v'},
+ {}};
static void
usage(void)
{
- fprintf(stderr, "Usage:\n\n"
- "\tgmemtool [-hv] [-g GPU]\n\n"
- "Options:\n"
- "\t-g, --gpu=GPU - use GMEM size/alignment/etc settings for the specified GPU\n"
- "\t-h, --help - this usage message\n"
- "\t-v, --verbose - dump more verbose output\n"
- "\n"
- );
- fprintf(stderr, "Where GPU is one of:\n");
- for (int i = 0; i < ARRAY_SIZE(gpu_infos); i++)
- fprintf(stderr, "\t%s\n", gpu_infos[i].name);
- exit(2);
+ fprintf(stderr, "Usage:\n\n"
+ "\tgmemtool [-hv] [-g GPU]\n\n"
+ "Options:\n"
+ "\t-g, --gpu=GPU - use GMEM size/alignment/etc settings "
+ "for the specified GPU\n"
+ "\t-h, --help - this usage message\n"
+ "\t-v, --verbose - dump more verbose output\n"
+ "\n");
+ fprintf(stderr, "Where GPU is one of:\n");
+ for (int i = 0; i < ARRAY_SIZE(gpu_infos); i++)
+ fprintf(stderr, "\t%s\n", gpu_infos[i].name);
+ exit(2);
}
int
main(int argc, char **argv)
{
- const char *gpu_name = "a630";
- int c;
-
- while ((c = getopt_long(argc, argv, "g:hv", opts, NULL)) != -1) {
- switch (c) {
- case 'g':
- gpu_name = optarg;
- break;
- case 'v':
- bin_debug = true;
- break;
- case 'h':
- default:
- usage();
- }
- }
-
- const struct gpu_info *gpu_info = NULL;
-
- for (int i = 0; i < ARRAY_SIZE(gpu_infos); i++) {
- if (strcmp(gpu_name, gpu_infos[i].name) == 0) {
- gpu_info = &gpu_infos[i];
- break;
- }
- }
-
- if (!gpu_info) {
- printf("unrecognized gpu name: %s\n", gpu_name);
- usage();
- }
-
- /* Setup a fake screen with enough GMEM related configuration
- * to make gmem_stateobj_init() happy:
- */
- struct fd_screen screen = {
- .gpu_id = gpu_info->gpu_id,
- .gmemsize_bytes = gpu_info->gmemsize_bytes,
- };
-
- freedreno_dev_info_init(&screen.info, gpu_info->gpu_id);
-
- /* And finally run thru all the GMEM keys: */
- for (int i = 0; i < ARRAY_SIZE(keys); i++) {
- struct gmem_key key = keys[i];
- key.gmem_page_align = gpu_info->gmem_page_align;
- struct fd_gmem_stateobj *gmem = gmem_stateobj_init(&screen, &key);
- dump_gmem_state(gmem);
-
- assert((gmem->bin_w * gmem->nbins_x) >= key.width);
- assert((gmem->bin_h * gmem->nbins_y) >= key.height);
- assert(gmem->bin_w < screen.info.tile_max_w);
- assert(gmem->bin_h < screen.info.tile_max_h);
-
- ralloc_free(gmem);
- }
-
- return 0;
+ const char *gpu_name = "a630";
+ int c;
+
+ while ((c = getopt_long(argc, argv, "g:hv", opts, NULL)) != -1) {
+ switch (c) {
+ case 'g':
+ gpu_name = optarg;
+ break;
+ case 'v':
+ bin_debug = true;
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ const struct gpu_info *gpu_info = NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(gpu_infos); i++) {
+ if (strcmp(gpu_name, gpu_infos[i].name) == 0) {
+ gpu_info = &gpu_infos[i];
+ break;
+ }
+ }
+
+ if (!gpu_info) {
+ printf("unrecognized gpu name: %s\n", gpu_name);
+ usage();
+ }
+
+ /* Setup a fake screen with enough GMEM related configuration
+ * to make gmem_stateobj_init() happy:
+ */
+ struct fd_screen screen = {
+ .gpu_id = gpu_info->gpu_id,
+ .gmemsize_bytes = gpu_info->gmemsize_bytes,
+ };
+
+ freedreno_dev_info_init(&screen.info, gpu_info->gpu_id);
+
+ /* And finally run thru all the GMEM keys: */
+ for (int i = 0; i < ARRAY_SIZE(keys); i++) {
+ struct gmem_key key = keys[i];
+ key.gmem_page_align = gpu_info->gmem_page_align;
+ struct fd_gmem_stateobj *gmem = gmem_stateobj_init(&screen, &key);
+ dump_gmem_state(gmem);
+
+ assert((gmem->bin_w * gmem->nbins_x) >= key.width);
+ assert((gmem->bin_h * gmem->nbins_y) >= key.height);
+ assert(gmem->bin_w < screen.info.tile_max_w);
+ assert(gmem->bin_h < screen.info.tile_max_h);
+
+ ralloc_free(gmem);
+ }
+
+ return 0;
}
* Rob Clark <robclark@freedesktop.org>
*/
-#include "util/ralloc.h"
#include "util/hash_table.h"
+#include "util/ralloc.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
#include "ir3_cache.h"
#include "ir3_gallium.h"
-
static uint32_t
key_hash(const void *_key)
{
- const struct ir3_cache_key *key = _key;
- return XXH32(key, sizeof(*key), 0);
+ const struct ir3_cache_key *key = _key;
+ return XXH32(key, sizeof(*key), 0);
}
static bool
key_equals(const void *_a, const void *_b)
{
- const struct ir3_cache_key *a = _a;
- const struct ir3_cache_key *b = _b;
- // TODO we could optimize the key shader-variant key comparison by not
- // ignoring has_per_samp.. not really sure if that helps..
- return memcmp(a, b, sizeof(struct ir3_cache_key)) == 0;
+ const struct ir3_cache_key *a = _a;
+ const struct ir3_cache_key *b = _b;
+ // TODO we could optimize the key shader-variant key comparison by not
+ // ignoring has_per_samp.. not really sure if that helps..
+ return memcmp(a, b, sizeof(struct ir3_cache_key)) == 0;
}
struct ir3_cache {
- /* cache mapping gallium/etc shader state-objs + shader-key to backend
- * specific state-object
- */
- struct hash_table *ht;
+ /* cache mapping gallium/etc shader state-objs + shader-key to backend
+ * specific state-object
+ */
+ struct hash_table *ht;
- const struct ir3_cache_funcs *funcs;
- void *data;
+ const struct ir3_cache_funcs *funcs;
+ void *data;
};
-struct ir3_cache * ir3_cache_create(const struct ir3_cache_funcs *funcs, void *data)
+struct ir3_cache *
+ir3_cache_create(const struct ir3_cache_funcs *funcs, void *data)
{
- struct ir3_cache *cache = rzalloc(NULL, struct ir3_cache);
+ struct ir3_cache *cache = rzalloc(NULL, struct ir3_cache);
- cache->ht = _mesa_hash_table_create(cache, key_hash, key_equals);
- cache->funcs = funcs;
- cache->data = data;
+ cache->ht = _mesa_hash_table_create(cache, key_hash, key_equals);
+ cache->funcs = funcs;
+ cache->data = data;
- return cache;
+ return cache;
}
-void ir3_cache_destroy(struct ir3_cache *cache)
+void
+ir3_cache_destroy(struct ir3_cache *cache)
{
- if (!cache)
- return;
+ if (!cache)
+ return;
- /* _mesa_hash_table_destroy is so *almost* useful.. */
- hash_table_foreach(cache->ht, entry) {
- cache->funcs->destroy_state(cache->data, entry->data);
- }
+ /* _mesa_hash_table_destroy is so *almost* useful.. */
+ hash_table_foreach(cache->ht, entry)
+ {
+ cache->funcs->destroy_state(cache->data, entry->data);
+ }
- ralloc_free(cache);
+ ralloc_free(cache);
}
struct ir3_program_state *
ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug)
{
- uint32_t hash = key_hash(key);
- struct hash_entry *entry =
- _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
-
- if (entry) {
- return entry->data;
- }
-
- if (key->hs)
- debug_assert(key->ds);
-
- struct ir3_shader *shaders[MESA_SHADER_STAGES] = {
- [MESA_SHADER_VERTEX] = ir3_get_shader(key->vs),
- [MESA_SHADER_TESS_CTRL] = ir3_get_shader(key->hs),
- [MESA_SHADER_TESS_EVAL] = ir3_get_shader(key->ds),
- [MESA_SHADER_GEOMETRY] = ir3_get_shader(key->gs),
- [MESA_SHADER_FRAGMENT] = ir3_get_shader(key->fs),
- };
-
- struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
- struct ir3_shader_key shader_key = key->key;
-
- for (gl_shader_stage stage = MESA_SHADER_VERTEX;
- stage < MESA_SHADER_STAGES; stage++) {
- if (shaders[stage]) {
- variants[stage] =
- ir3_shader_variant(shaders[stage], shader_key, false, debug);
- if (!variants[stage])
- return NULL;
- } else {
- variants[stage] = NULL;
- }
- }
-
- struct ir3_compiler *compiler = shaders[MESA_SHADER_VERTEX]->compiler;
- uint32_t safe_constlens = ir3_trim_constlen(variants, compiler);
- shader_key.safe_constlen = true;
-
- for (gl_shader_stage stage = MESA_SHADER_VERTEX;
- stage < MESA_SHADER_STAGES; stage++) {
- if (safe_constlens & (1 << stage)) {
- variants[stage] =
- ir3_shader_variant(shaders[stage], shader_key, false, debug);
- if (!variants[stage])
- return NULL;
- }
- }
-
- struct ir3_shader_variant *bs;
-
- if (ir3_has_binning_vs(&key->key)) {
- shader_key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
- bs = ir3_shader_variant(shaders[MESA_SHADER_VERTEX], key->key, true, debug);
- if (!bs)
- return NULL;
- } else {
- bs = variants[MESA_SHADER_VERTEX];
- }
-
- struct ir3_program_state *state =
- cache->funcs->create_state(cache->data, bs,
- variants[MESA_SHADER_VERTEX],
- variants[MESA_SHADER_TESS_CTRL],
- variants[MESA_SHADER_TESS_EVAL],
- variants[MESA_SHADER_GEOMETRY],
- variants[MESA_SHADER_FRAGMENT],
- &key->key);
- state->key = *key;
-
- /* NOTE: uses copy of key in state obj, because pointer passed by caller
- * is probably on the stack
- */
- _mesa_hash_table_insert_pre_hashed(cache->ht, hash, &state->key, state);
-
- return state;
+ uint32_t hash = key_hash(key);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
+
+ if (entry) {
+ return entry->data;
+ }
+
+ if (key->hs)
+ debug_assert(key->ds);
+
+ struct ir3_shader *shaders[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = ir3_get_shader(key->vs),
+ [MESA_SHADER_TESS_CTRL] = ir3_get_shader(key->hs),
+ [MESA_SHADER_TESS_EVAL] = ir3_get_shader(key->ds),
+ [MESA_SHADER_GEOMETRY] = ir3_get_shader(key->gs),
+ [MESA_SHADER_FRAGMENT] = ir3_get_shader(key->fs),
+ };
+
+ struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
+ struct ir3_shader_key shader_key = key->key;
+
+ for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+ stage++) {
+ if (shaders[stage]) {
+ variants[stage] =
+ ir3_shader_variant(shaders[stage], shader_key, false, debug);
+ if (!variants[stage])
+ return NULL;
+ } else {
+ variants[stage] = NULL;
+ }
+ }
+
+ struct ir3_compiler *compiler = shaders[MESA_SHADER_VERTEX]->compiler;
+ uint32_t safe_constlens = ir3_trim_constlen(variants, compiler);
+ shader_key.safe_constlen = true;
+
+ for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+ stage++) {
+ if (safe_constlens & (1 << stage)) {
+ variants[stage] =
+ ir3_shader_variant(shaders[stage], shader_key, false, debug);
+ if (!variants[stage])
+ return NULL;
+ }
+ }
+
+ struct ir3_shader_variant *bs;
+
+ if (ir3_has_binning_vs(&key->key)) {
+ shader_key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
+ bs =
+ ir3_shader_variant(shaders[MESA_SHADER_VERTEX], key->key, true, debug);
+ if (!bs)
+ return NULL;
+ } else {
+ bs = variants[MESA_SHADER_VERTEX];
+ }
+
+ struct ir3_program_state *state = cache->funcs->create_state(
+ cache->data, bs, variants[MESA_SHADER_VERTEX],
+ variants[MESA_SHADER_TESS_CTRL], variants[MESA_SHADER_TESS_EVAL],
+ variants[MESA_SHADER_GEOMETRY], variants[MESA_SHADER_FRAGMENT],
+ &key->key);
+ state->key = *key;
+
+ /* NOTE: uses copy of key in state obj, because pointer passed by caller
+ * is probably on the stack
+ */
+ _mesa_hash_table_insert_pre_hashed(cache->ht, hash, &state->key, state);
+
+ return state;
}
/* call when an API level state object is destroyed, to invalidate
* cache entries which reference that state object.
*/
-void ir3_cache_invalidate(struct ir3_cache *cache, void *stobj)
+void
+ir3_cache_invalidate(struct ir3_cache *cache, void *stobj)
{
- if (!cache)
- return;
-
- hash_table_foreach(cache->ht, entry) {
- const struct ir3_cache_key *key = entry->key;
- if ((key->fs == stobj) || (key->vs == stobj) ||
- (key->ds == stobj) || (key->hs == stobj) ||
- (key->gs == stobj)) {
- cache->funcs->destroy_state(cache->data, entry->data);
- _mesa_hash_table_remove(cache->ht, entry);
- return;
- }
- }
+ if (!cache)
+ return;
+
+ hash_table_foreach(cache->ht, entry)
+ {
+ const struct ir3_cache_key *key = entry->key;
+ if ((key->fs == stobj) || (key->vs == stobj) || (key->ds == stobj) ||
+ (key->hs == stobj) || (key->gs == stobj)) {
+ cache->funcs->destroy_state(cache->data, entry->data);
+ _mesa_hash_table_remove(cache->ht, entry);
+ return;
+ }
+ }
}
/* key into program state cache */
struct ir3_cache_key {
- struct ir3_shader_state *vs, *hs, *ds, *gs, *fs; // 5 pointers
- struct ir3_shader_key key; // 7 dwords
+ struct ir3_shader_state *vs, *hs, *ds, *gs, *fs; // 5 pointers
+ struct ir3_shader_key key; // 7 dwords
};
/* per-gen backend program state object should subclass this for it's
* allocated on the stack
*/
struct ir3_program_state {
- struct ir3_cache_key key;
+ struct ir3_cache_key key;
};
struct ir3_cache_funcs {
- struct ir3_program_state *(*create_state)(void *data,
- struct ir3_shader_variant *bs, /* binning pass vs */
- struct ir3_shader_variant *vs,
- struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds,
- struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
- const struct ir3_shader_key *key);
- void (*destroy_state)(void *data, struct ir3_program_state *state);
+ struct ir3_program_state *(*create_state)(
+ void *data, struct ir3_shader_variant *bs, /* binning pass vs */
+ struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
+ struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
+ struct ir3_shader_variant *fs, const struct ir3_shader_key *key);
+ void (*destroy_state)(void *data, struct ir3_program_state *state);
};
struct ir3_cache;
/* construct a shader cache. Free with ralloc_free() */
-struct ir3_cache * ir3_cache_create(const struct ir3_cache_funcs *funcs, void *data);
+struct ir3_cache *ir3_cache_create(const struct ir3_cache_funcs *funcs,
+ void *data);
void ir3_cache_destroy(struct ir3_cache *cache);
/* debug callback is used for shader-db logs in case the lookup triggers
* shader variant compilation.
*/
-struct ir3_program_state * ir3_cache_lookup(struct ir3_cache *cache,
- const struct ir3_cache_key *key,
- struct pipe_debug_callback *debug);
+struct ir3_program_state *ir3_cache_lookup(struct ir3_cache *cache,
+ const struct ir3_cache_key *key,
+ struct pipe_debug_callback *debug);
/* call when an API level state object is destroyed, to invalidate
* cache entries which reference that state object.
* Rob Clark <robclark@freedesktop.org>
*/
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
+#include <err.h>
#include <fcntl.h>
+#include <getopt.h>
#include <stdint.h>
-#include <stdlib.h>
#include <stdio.h>
-#include <err.h>
-#include <getopt.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
#include "nir/tgsi_to_nir.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
-#include "tgsi/tgsi_dump.h"
+#include "ir3/instr-a3xx.h"
+#include "ir3/ir3.h"
#include "ir3/ir3_compiler.h"
#include "ir3/ir3_gallium.h"
#include "ir3/ir3_nir.h"
-#include "ir3/instr-a3xx.h"
-#include "ir3/ir3.h"
#include "main/mtypes.h"
-#include "compiler/glsl/standalone.h"
-#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/glsl/gl_nir.h"
+#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/standalone.h"
#include "compiler/nir_types.h"
#include "compiler/spirv/nir_spirv.h"
static void
dump_info(struct ir3_shader_variant *so, const char *str)
{
- uint32_t *bin;
- const char *type = ir3_shader_stage(so);
- bin = ir3_shader_assemble(so);
- printf("; %s: %s\n", type, str);
- ir3_shader_disasm(so, bin, stdout);
+ uint32_t *bin;
+ const char *type = ir3_shader_stage(so);
+ bin = ir3_shader_assemble(so);
+ printf("; %s: %s\n", type, str);
+ ir3_shader_disasm(so, bin, stdout);
}
static void
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
{
- nir_foreach_variable_in_list(var, var_list) {
- if (var->data.location > new_var->data.location) {
- exec_node_insert_node_before(&var->node, &new_var->node);
- return;
- }
- }
- exec_list_push_tail(var_list, &new_var->node);
+ nir_foreach_variable_in_list (var, var_list) {
+ if (var->data.location > new_var->data.location) {
+ exec_node_insert_node_before(&var->node, &new_var->node);
+ return;
+ }
+ }
+ exec_list_push_tail(var_list, &new_var->node);
}
static void
sort_varyings(nir_shader *nir, nir_variable_mode mode)
{
- struct exec_list new_list;
- exec_list_make_empty(&new_list);
- nir_foreach_variable_with_modes_safe(var, nir, mode) {
- exec_node_remove(&var->node);
- insert_sorted(&new_list, var);
- }
- exec_list_append(&nir->variables, &new_list);
+ struct exec_list new_list;
+ exec_list_make_empty(&new_list);
+ nir_foreach_variable_with_modes_safe(var, nir, mode)
+ {
+ exec_node_remove(&var->node);
+ insert_sorted(&new_list, var);
+ }
+ exec_list_append(&nir->variables, &new_list);
}
static void
fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
{
- nir_foreach_variable_with_modes(var, nir, mode) {
- if (var->data.location >= VARYING_SLOT_VAR0) {
- var->data.location += 9;
- } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
- (var->data.location <= VARYING_SLOT_TEX7)) {
- var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
- }
- }
+ nir_foreach_variable_with_modes(var, nir, mode)
+ {
+ if (var->data.location >= VARYING_SLOT_VAR0) {
+ var->data.location += 9;
+ } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
+ (var->data.location <= VARYING_SLOT_TEX7)) {
+ var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
+ }
+ }
}
static struct ir3_compiler *compiler;
static nir_shader *
-load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage)
+load_glsl(unsigned num_files, char *const *files, gl_shader_stage stage)
{
- static const struct standalone_options options = {
- .glsl_version = 310,
- .do_link = true,
- .lower_precision = true,
- };
- struct gl_shader_program *prog;
- const nir_shader_compiler_options *nir_options =
- ir3_get_compiler_options(compiler);
- static struct gl_context local_ctx;
-
- prog = standalone_compile_shader(&options, num_files, files, &local_ctx);
- if (!prog)
- errx(1, "couldn't parse `%s'", files[0]);
-
- nir_shader *nir = glsl_to_nir(&local_ctx, prog, stage, nir_options);
-
- /* required NIR passes: */
- if (nir_options->lower_all_io_to_temps ||
- nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_GEOMETRY) {
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir),
- true, true);
- } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir),
- true, false);
- }
-
- NIR_PASS_V(nir, nir_lower_global_vars_to_local);
- NIR_PASS_V(nir, nir_split_var_copies);
- NIR_PASS_V(nir, nir_lower_var_copies);
-
- NIR_PASS_V(nir, nir_split_var_copies);
- NIR_PASS_V(nir, nir_lower_var_copies);
- nir_print_shader(nir, stdout);
- NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true);
- NIR_PASS_V(nir, gl_nir_lower_buffers, prog);
- NIR_PASS_V(nir, nir_lower_atomics_to_ssbo);
- nir_print_shader(nir, stdout);
-
- switch (stage) {
- case MESA_SHADER_VERTEX:
- nir_assign_var_locations(nir, nir_var_shader_in,
- &nir->num_inputs,
- ir3_glsl_type_size);
-
- /* Re-lower global vars, to deal with any dead VS inputs. */
- NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-
- sort_varyings(nir, nir_var_shader_out);
- nir_assign_var_locations(nir, nir_var_shader_out,
- &nir->num_outputs,
- ir3_glsl_type_size);
- fixup_varying_slots(nir, nir_var_shader_out);
- break;
- case MESA_SHADER_FRAGMENT:
- sort_varyings(nir, nir_var_shader_in);
- nir_assign_var_locations(nir, nir_var_shader_in,
- &nir->num_inputs,
- ir3_glsl_type_size);
- fixup_varying_slots(nir, nir_var_shader_in);
- nir_assign_var_locations(nir, nir_var_shader_out,
- &nir->num_outputs,
- ir3_glsl_type_size);
- break;
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_KERNEL:
- break;
- default:
- errx(1, "unhandled shader stage: %d", stage);
- }
-
- nir_assign_var_locations(nir, nir_var_uniform,
- &nir->num_uniforms,
- ir3_glsl_type_size);
-
- NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
-
- NIR_PASS_V(nir, nir_lower_frexp);
- NIR_PASS_V(nir, nir_lower_io,
- nir_var_shader_in | nir_var_shader_out | nir_var_uniform,
- ir3_glsl_type_size, (nir_lower_io_options)0);
- NIR_PASS_V(nir, gl_nir_lower_samplers, prog);
-
- return nir;
+ static const struct standalone_options options = {
+ .glsl_version = 310,
+ .do_link = true,
+ .lower_precision = true,
+ };
+ struct gl_shader_program *prog;
+ const nir_shader_compiler_options *nir_options =
+ ir3_get_compiler_options(compiler);
+ static struct gl_context local_ctx;
+
+ prog = standalone_compile_shader(&options, num_files, files, &local_ctx);
+ if (!prog)
+ errx(1, "couldn't parse `%s'", files[0]);
+
+ nir_shader *nir = glsl_to_nir(&local_ctx, prog, stage, nir_options);
+
+ /* required NIR passes: */
+ if (nir_options->lower_all_io_to_temps ||
+ nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_GEOMETRY) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, true);
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, false);
+ }
+
+ NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+ nir_print_shader(nir, stdout);
+ NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true);
+ NIR_PASS_V(nir, gl_nir_lower_buffers, prog);
+ NIR_PASS_V(nir, nir_lower_atomics_to_ssbo);
+ nir_print_shader(nir, stdout);
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ nir_assign_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
+ ir3_glsl_type_size);
+
+ /* Re-lower global vars, to deal with any dead VS inputs. */
+ NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+
+ sort_varyings(nir, nir_var_shader_out);
+ nir_assign_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
+ ir3_glsl_type_size);
+ fixup_varying_slots(nir, nir_var_shader_out);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ sort_varyings(nir, nir_var_shader_in);
+ nir_assign_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
+ ir3_glsl_type_size);
+ fixup_varying_slots(nir, nir_var_shader_in);
+ nir_assign_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
+ ir3_glsl_type_size);
+ break;
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
+ break;
+ default:
+ errx(1, "unhandled shader stage: %d", stage);
+ }
+
+ nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
+ ir3_glsl_type_size);
+
+ NIR_PASS_V(nir, nir_lower_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+
+ NIR_PASS_V(nir, nir_lower_frexp);
+ NIR_PASS_V(nir, nir_lower_io,
+ nir_var_shader_in | nir_var_shader_out | nir_var_uniform,
+ ir3_glsl_type_size, (nir_lower_io_options)0);
+ NIR_PASS_V(nir, gl_nir_lower_samplers, prog);
+
+ return nir;
}
static int
read_file(const char *filename, void **ptr, size_t *size)
{
- int fd, ret;
- struct stat st;
+ int fd, ret;
+ struct stat st;
- *ptr = MAP_FAILED;
+ *ptr = MAP_FAILED;
- fd = open(filename, O_RDONLY);
- if (fd == -1) {
- warnx("couldn't open `%s'", filename);
- return 1;
- }
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ warnx("couldn't open `%s'", filename);
+ return 1;
+ }
- ret = fstat(fd, &st);
- if (ret)
- errx(1, "couldn't stat `%s'", filename);
+ ret = fstat(fd, &st);
+ if (ret)
+ errx(1, "couldn't stat `%s'", filename);
- *size = st.st_size;
- *ptr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
- if (*ptr == MAP_FAILED)
- errx(1, "couldn't map `%s'", filename);
+ *size = st.st_size;
+ *ptr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (*ptr == MAP_FAILED)
+ errx(1, "couldn't map `%s'", filename);
- close(fd);
+ close(fd);
- return 0;
+ return 0;
}
-static void debug_func(void *priv, enum nir_spirv_debug_level level,
- size_t spirv_offset, const char *message)
+static void
+debug_func(void *priv, enum nir_spirv_debug_level level, size_t spirv_offset,
+ const char *message)
{
-// printf("%s\n", message);
+ // printf("%s\n", message);
}
static nir_shader *
load_spirv(const char *filename, const char *entry, gl_shader_stage stage)
{
- const struct spirv_to_nir_options spirv_options = {
- /* these caps are just make-believe */
- .caps = {
- .draw_parameters = true,
- .float64 = true,
- .image_read_without_format = true,
- .image_write_without_format = true,
- .int64 = true,
- .variable_pointers = true,
- },
- .debug = {
- .func = debug_func,
- }
- };
- nir_shader *nir;
- void *buf;
- size_t size;
-
- read_file(filename, &buf, &size);
-
- nir = spirv_to_nir(buf, size / 4,
- NULL, 0, /* spec_entries */
- stage, entry,
- &spirv_options,
- ir3_get_compiler_options(compiler));
-
- nir_print_shader(nir, stdout);
-
- return nir;
+ const struct spirv_to_nir_options spirv_options = {
+ /* these caps are just make-believe */
+ .caps =
+ {
+ .draw_parameters = true,
+ .float64 = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .int64 = true,
+ .variable_pointers = true,
+ },
+ .debug = {
+ .func = debug_func,
+ }};
+ nir_shader *nir;
+ void *buf;
+ size_t size;
+
+ read_file(filename, &buf, &size);
+
+ nir = spirv_to_nir(buf, size / 4, NULL, 0, /* spec_entries */
+ stage, entry, &spirv_options,
+ ir3_get_compiler_options(compiler));
+
+ nir_print_shader(nir, stdout);
+
+ return nir;
}
static const char *shortopts = "g:hv";
static const struct option longopts[] = {
- { "gpu", required_argument, 0, 'g' },
- { "help", no_argument, 0, 'h' },
- { "verbose", no_argument, 0, 'v' },
+ {"gpu", required_argument, 0, 'g'},
+ {"help", no_argument, 0, 'h'},
+ {"verbose", no_argument, 0, 'v'},
};
static void
print_usage(void)
{
- printf("Usage: ir3_compiler [OPTIONS]... <file.tgsi | file.spv entry_point | (file.vert | file.frag)*>\n");
- printf(" -g, --gpu GPU_ID - specify gpu-id (default 320)\n");
- printf(" -h, --help - show this message\n");
- printf(" -v, --verbose - verbose compiler/debug messages\n");
+ printf("Usage: ir3_compiler [OPTIONS]... <file.tgsi | file.spv entry_point "
+ "| (file.vert | file.frag)*>\n");
+ printf(" -g, --gpu GPU_ID - specify gpu-id (default 320)\n");
+ printf(" -h, --help - show this message\n");
+ printf(" -v, --verbose - verbose compiler/debug messages\n");
}
int
main(int argc, char **argv)
{
- int ret = 0, opt;
- char *filenames[2];
- int num_files = 0;
- unsigned stage = 0;
- struct ir3_shader_key key = {};
- unsigned gpu_id = 320;
- const char *info;
- const char *spirv_entry = NULL;
- void *ptr;
- bool from_tgsi = false;
- size_t size;
-
- while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
- switch (opt) {
- case 'g':
- gpu_id = strtol(optarg, NULL, 0);
- break;
- case 'v':
- ir3_shader_debug |= IR3_DBG_OPTMSGS | IR3_DBG_DISASM;
- break;
- default:
- printf("unrecognized arg: %c\n", opt);
- /* fallthrough */
- case 'h':
- print_usage();
- return 0;
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "no file specified!\n");
- print_usage();
- return 0;
- }
-
- unsigned n = optind;
- while (n < argc) {
- char *filename = argv[n];
- char *ext = strrchr(filename, '.');
-
- if (strcmp(ext, ".tgsi") == 0) {
- if (num_files != 0)
- errx(1, "in TGSI mode, only a single file may be specified");
- from_tgsi = true;
- } else if (strcmp(ext, ".spv") == 0) {
- if (num_files != 0)
- errx(1, "in SPIR-V mode, only a single file may be specified");
- stage = MESA_SHADER_COMPUTE;
- filenames[num_files++] = filename;
- n++;
- if (n == argc)
- errx(1, "in SPIR-V mode, an entry point must be specified");
- spirv_entry = argv[n];
- n++;
- } else if (strcmp(ext, ".comp") == 0) {
- if (from_tgsi || spirv_entry)
- errx(1, "cannot mix GLSL/TGSI/SPIRV");
- if (num_files >= ARRAY_SIZE(filenames))
- errx(1, "too many GLSL files");
- stage = MESA_SHADER_COMPUTE;
- } else if (strcmp(ext, ".frag") == 0) {
- if (from_tgsi || spirv_entry)
- errx(1, "cannot mix GLSL/TGSI/SPIRV");
- if (num_files >= ARRAY_SIZE(filenames))
- errx(1, "too many GLSL files");
- stage = MESA_SHADER_FRAGMENT;
- } else if (strcmp(ext, ".vert") == 0) {
- if (from_tgsi)
- errx(1, "cannot mix GLSL and TGSI");
- if (num_files >= ARRAY_SIZE(filenames))
- errx(1, "too many GLSL files");
- stage = MESA_SHADER_VERTEX;
- } else {
- print_usage();
- return -1;
- }
-
- filenames[num_files++] = filename;
-
- n++;
- }
-
- nir_shader *nir;
-
- compiler = ir3_compiler_create(NULL, gpu_id);
-
- if (from_tgsi) {
- struct tgsi_token toks[65536];
- const nir_shader_compiler_options *nir_options =
- ir3_get_compiler_options(compiler);
-
- ret = read_file(filenames[0], &ptr, &size);
- if (ret) {
- print_usage();
- return ret;
- }
-
- if (ir3_shader_debug & IR3_DBG_OPTMSGS)
- printf("%s\n", (char *)ptr);
-
- if (!tgsi_text_translate(ptr, toks, ARRAY_SIZE(toks)))
- errx(1, "could not parse `%s'", filenames[0]);
-
- if (ir3_shader_debug & IR3_DBG_OPTMSGS)
- tgsi_dump(toks, 0);
-
- nir = tgsi_to_nir_noscreen(toks, nir_options);
- NIR_PASS_V(nir, nir_lower_global_vars_to_local);
- } else if (spirv_entry) {
- nir = load_spirv(filenames[0], spirv_entry, stage);
-
- NIR_PASS_V(nir, nir_lower_io,
- nir_var_shader_in | nir_var_shader_out,
- ir3_glsl_type_size, (nir_lower_io_options)0);
-
- /* TODO do this somewhere else */
- nir_lower_int64(nir);
- nir_lower_system_values(nir);
- nir_lower_compute_system_values(nir, NULL);
- } else if (num_files > 0) {
- nir = load_glsl(num_files, filenames, stage);
- } else {
- print_usage();
- return -1;
- }
-
- ir3_finalize_nir(compiler, nir);
- ir3_nir_post_finalize(compiler, nir);
-
- struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
- shader->compiler = compiler;
- shader->type = stage;
- shader->nir = nir;
-
- struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
- v->type = shader->type;
- v->shader = shader;
- v->key = key;
- v->const_state = rzalloc_size(v, sizeof(*v->const_state));
-
- shader->variants = v;
- shader->variant_count = 1;
-
- ir3_nir_lower_variant(v, nir);
-
- info = "NIR compiler";
- ret = ir3_compile_shader_nir(compiler, v);
- if (ret) {
- fprintf(stderr, "compiler failed!\n");
- return ret;
- }
- dump_info(v, info);
-
- return 0;
+ int ret = 0, opt;
+ char *filenames[2];
+ int num_files = 0;
+ unsigned stage = 0;
+ struct ir3_shader_key key = {};
+ unsigned gpu_id = 320;
+ const char *info;
+ const char *spirv_entry = NULL;
+ void *ptr;
+ bool from_tgsi = false;
+ size_t size;
+
+ while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) !=
+ -1) {
+ switch (opt) {
+ case 'g':
+ gpu_id = strtol(optarg, NULL, 0);
+ break;
+ case 'v':
+ ir3_shader_debug |= IR3_DBG_OPTMSGS | IR3_DBG_DISASM;
+ break;
+ default:
+ printf("unrecognized arg: %c\n", opt);
+ /* fallthrough */
+ case 'h':
+ print_usage();
+ return 0;
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "no file specified!\n");
+ print_usage();
+ return 0;
+ }
+
+ unsigned n = optind;
+ while (n < argc) {
+ char *filename = argv[n];
+ char *ext = strrchr(filename, '.');
+
+ if (strcmp(ext, ".tgsi") == 0) {
+ if (num_files != 0)
+ errx(1, "in TGSI mode, only a single file may be specified");
+ from_tgsi = true;
+ } else if (strcmp(ext, ".spv") == 0) {
+ if (num_files != 0)
+ errx(1, "in SPIR-V mode, only a single file may be specified");
+ stage = MESA_SHADER_COMPUTE;
+ filenames[num_files++] = filename;
+ n++;
+ if (n == argc)
+ errx(1, "in SPIR-V mode, an entry point must be specified");
+ spirv_entry = argv[n];
+ n++;
+ } else if (strcmp(ext, ".comp") == 0) {
+ if (from_tgsi || spirv_entry)
+ errx(1, "cannot mix GLSL/TGSI/SPIRV");
+ if (num_files >= ARRAY_SIZE(filenames))
+ errx(1, "too many GLSL files");
+ stage = MESA_SHADER_COMPUTE;
+ } else if (strcmp(ext, ".frag") == 0) {
+ if (from_tgsi || spirv_entry)
+ errx(1, "cannot mix GLSL/TGSI/SPIRV");
+ if (num_files >= ARRAY_SIZE(filenames))
+ errx(1, "too many GLSL files");
+ stage = MESA_SHADER_FRAGMENT;
+ } else if (strcmp(ext, ".vert") == 0) {
+ if (from_tgsi)
+ errx(1, "cannot mix GLSL and TGSI");
+ if (num_files >= ARRAY_SIZE(filenames))
+ errx(1, "too many GLSL files");
+ stage = MESA_SHADER_VERTEX;
+ } else {
+ print_usage();
+ return -1;
+ }
+
+ filenames[num_files++] = filename;
+
+ n++;
+ }
+
+ nir_shader *nir;
+
+ compiler = ir3_compiler_create(NULL, gpu_id);
+
+ if (from_tgsi) {
+ struct tgsi_token toks[65536];
+ const nir_shader_compiler_options *nir_options =
+ ir3_get_compiler_options(compiler);
+
+ ret = read_file(filenames[0], &ptr, &size);
+ if (ret) {
+ print_usage();
+ return ret;
+ }
+
+ if (ir3_shader_debug & IR3_DBG_OPTMSGS)
+ printf("%s\n", (char *)ptr);
+
+ if (!tgsi_text_translate(ptr, toks, ARRAY_SIZE(toks)))
+ errx(1, "could not parse `%s'", filenames[0]);
+
+ if (ir3_shader_debug & IR3_DBG_OPTMSGS)
+ tgsi_dump(toks, 0);
+
+ nir = tgsi_to_nir_noscreen(toks, nir_options);
+ NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+ } else if (spirv_entry) {
+ nir = load_spirv(filenames[0], spirv_entry, stage);
+
+ NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ ir3_glsl_type_size, (nir_lower_io_options)0);
+
+ /* TODO do this somewhere else */
+ nir_lower_int64(nir);
+ nir_lower_system_values(nir);
+ nir_lower_compute_system_values(nir, NULL);
+ } else if (num_files > 0) {
+ nir = load_glsl(num_files, filenames, stage);
+ } else {
+ print_usage();
+ return -1;
+ }
+
+ ir3_finalize_nir(compiler, nir);
+ ir3_nir_post_finalize(compiler, nir);
+
+ struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
+ shader->compiler = compiler;
+ shader->type = stage;
+ shader->nir = nir;
+
+ struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
+ v->type = shader->type;
+ v->shader = shader;
+ v->key = key;
+ v->const_state = rzalloc_size(v, sizeof(*v->const_state));
+
+ shader->variants = v;
+ shader->variant_count = 1;
+
+ ir3_nir_lower_variant(v, nir);
+
+ info = "NIR compiler";
+ ret = ir3_compile_shader_nir(compiler, v);
+ if (ret) {
+ fprintf(stderr, "compiler failed!\n");
+ return ret;
+ }
+ dump_info(v, info);
+
+ return 0;
}
static bool is_stateobj(struct fd_ringbuffer *ring);
static void emit_const_user(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid,
- uint32_t size, const uint32_t *user_buffer);
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t size, const uint32_t *user_buffer);
static void emit_const_bo(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid,
- uint32_t offset, uint32_t size,
- struct fd_bo *bo);
-
-static void emit_const_prsc(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t regid,
- uint32_t offset, uint32_t size,
- struct pipe_resource *buffer)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t size, struct fd_bo *bo);
+
+static void
+emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
+ uint32_t regid, uint32_t offset, uint32_t size,
+ struct pipe_resource *buffer)
{
- struct fd_resource *rsc = fd_resource(buffer);
- emit_const_bo(ring, v, regid, offset, size, rsc->bo);
+ struct fd_resource *rsc = fd_resource(buffer);
+ emit_const_bo(ring, v, regid, offset, size, rsc->bo);
}
static void emit_const_ptrs(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t num, struct fd_bo **bos, uint32_t *offsets);
+ const struct ir3_shader_variant *v,
+ uint32_t dst_offset, uint32_t num,
+ struct fd_bo **bos, uint32_t *offsets);
static void
emit_const_asserts(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v,
- uint32_t regid, uint32_t sizedwords)
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords)
{
- assert((regid % 4) == 0);
- assert((sizedwords % 4) == 0);
- assert(regid + sizedwords <= v->constlen * 4);
+ assert((regid % 4) == 0);
+ assert((sizedwords % 4) == 0);
+ assert(regid + sizedwords <= v->constlen * 4);
}
static void
-ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
- assert_dt
+ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
{
- /* when we emit const state via ring (IB2) we need a WFI, but when
- * it is emit'd via stateobj, we don't
- */
- if (is_stateobj(ring))
- return;
+ /* when we emit const state via ring (IB2) we need a WFI, but when
+ * it is emit'd via stateobj, we don't
+ */
+ if (is_stateobj(ring))
+ return;
- fd_wfi(batch, ring);
+ fd_wfi(batch, ring);
}
/**
* Returns size in dwords.
*/
static inline void
-ir3_user_consts_size(struct ir3_ubo_analysis_state *state,
- unsigned *packets, unsigned *size)
+ir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets,
+ unsigned *size)
{
- *packets = *size = 0;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
- if (state->range[i].start < state->range[i].end) {
- *size += state->range[i].end - state->range[i].start;
- (*packets)++;
- }
- }
+ *packets = *size = 0;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
+ if (state->range[i].start < state->range[i].end) {
+ *size += state->range[i].end - state->range[i].start;
+ (*packets)++;
+ }
+ }
}
/**
*/
static inline void
ir3_emit_constant_data(struct fd_screen *screen,
- const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
-
- for (unsigned i = 0; i < state->num_enabled; i++) {
- unsigned ubo = state->range[i].ubo.block;
- if (ubo != const_state->constant_data_ubo)
- continue;
-
- uint32_t size = state->range[i].end - state->range[i].start;
-
- /* Pre-a6xx, we might have ranges enabled in the shader that aren't
- * used in the binning variant.
- */
- if (16 * v->constlen <= state->range[i].offset)
- continue;
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
-
- if (size == 0)
- continue;
-
- emit_const_bo(ring, v, state->range[i].offset / 4,
- v->info.constant_data_offset + state->range[i].start,
- size / 4, v->bo);
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+
+ for (unsigned i = 0; i < state->num_enabled; i++) {
+ unsigned ubo = state->range[i].ubo.block;
+ if (ubo != const_state->constant_data_ubo)
+ continue;
+
+ uint32_t size = state->range[i].end - state->range[i].start;
+
+ /* Pre-a6xx, we might have ranges enabled in the shader that aren't
+ * used in the binning variant.
+ */
+ if (16 * v->constlen <= state->range[i].offset)
+ continue;
+
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
+
+ if (size == 0)
+ continue;
+
+ emit_const_bo(ring, v, state->range[i].offset / 4,
+ v->info.constant_data_offset + state->range[i].start,
+ size / 4, v->bo);
+ }
}
/**
* shader).
*/
static inline void
-ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+ir3_emit_user_consts(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
-
- for (unsigned i = 0; i < state->num_enabled; i++) {
- assert(!state->range[i].ubo.bindless);
- unsigned ubo = state->range[i].ubo.block;
- if (!(constbuf->enabled_mask & (1 << ubo)) ||
- ubo == const_state->constant_data_ubo) {
- continue;
- }
- struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
-
- uint32_t size = state->range[i].end - state->range[i].start;
- uint32_t offset = cb->buffer_offset + state->range[i].start;
-
- /* Pre-a6xx, we might have ranges enabled in the shader that aren't
- * used in the binning variant.
- */
- if (16 * v->constlen <= state->range[i].offset)
- continue;
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
-
- if (size == 0)
- continue;
-
- /* things should be aligned to vec4: */
- debug_assert((state->range[i].offset % 16) == 0);
- debug_assert((size % 16) == 0);
- debug_assert((offset % 16) == 0);
-
- if (cb->user_buffer) {
- emit_const_user(ring, v, state->range[i].offset / 4,
- size / 4, cb->user_buffer + state->range[i].start);
- } else {
- emit_const_prsc(ring, v, state->range[i].offset / 4,
- offset, size / 4, cb->buffer);
- }
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+
+ for (unsigned i = 0; i < state->num_enabled; i++) {
+ assert(!state->range[i].ubo.bindless);
+ unsigned ubo = state->range[i].ubo.block;
+ if (!(constbuf->enabled_mask & (1 << ubo)) ||
+ ubo == const_state->constant_data_ubo) {
+ continue;
+ }
+ struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
+
+ uint32_t size = state->range[i].end - state->range[i].start;
+ uint32_t offset = cb->buffer_offset + state->range[i].start;
+
+ /* Pre-a6xx, we might have ranges enabled in the shader that aren't
+ * used in the binning variant.
+ */
+ if (16 * v->constlen <= state->range[i].offset)
+ continue;
+
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
+
+ if (size == 0)
+ continue;
+
+ /* things should be aligned to vec4: */
+ debug_assert((state->range[i].offset % 16) == 0);
+ debug_assert((size % 16) == 0);
+ debug_assert((offset % 16) == 0);
+
+ if (cb->user_buffer) {
+ emit_const_user(ring, v, state->range[i].offset / 4, size / 4,
+ cb->user_buffer + state->range[i].start);
+ } else {
+ emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
+ cb->buffer);
+ }
+ }
}
static inline void
ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+ struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.ubo;
-
- /* a6xx+ uses UBO state and ldc instead of pointers emitted in
- * const state and ldg:
- */
- if (ctx->screen->gpu_id >= 600)
- return;
-
- if (v->constlen > offset) {
- uint32_t params = const_state->num_ubos;
- uint32_t offsets[params];
- struct fd_bo *bos[params];
-
- for (uint32_t i = 0; i < params; i++) {
- if (i == const_state->constant_data_ubo) {
- bos[i] = v->bo;
- offsets[i] = v->info.constant_data_offset;
- continue;
- }
-
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
-
- /* If we have user pointers (constbuf 0, aka GL uniforms), upload
- * them to a buffer now, and save it in the constbuf so that we
- * don't have to reupload until they get changed.
- */
- if (cb->user_buffer) {
- struct pipe_context *pctx = &ctx->base;
- u_upload_data(pctx->stream_uploader, 0,
- cb->buffer_size,
- 64,
- cb->user_buffer,
- &cb->buffer_offset, &cb->buffer);
- cb->user_buffer = NULL;
- }
-
- if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
- offsets[i] = cb->buffer_offset;
- bos[i] = fd_resource(cb->buffer)->bo;
- } else {
- offsets[i] = 0;
- bos[i] = NULL;
- }
- }
-
- assert(offset * 4 + params <= v->constlen * 4);
-
- emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.ubo;
+
+ /* a6xx+ uses UBO state and ldc instead of pointers emitted in
+ * const state and ldg:
+ */
+ if (ctx->screen->gpu_id >= 600)
+ return;
+
+ if (v->constlen > offset) {
+ uint32_t params = const_state->num_ubos;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ if (i == const_state->constant_data_ubo) {
+ bos[i] = v->bo;
+ offsets[i] = v->info.constant_data_offset;
+ continue;
+ }
+
+ struct pipe_constant_buffer *cb = &constbuf->cb[i];
+
+ /* If we have user pointers (constbuf 0, aka GL uniforms), upload
+ * them to a buffer now, and save it in the constbuf so that we
+ * don't have to reupload until they get changed.
+ */
+ if (cb->user_buffer) {
+ struct pipe_context *pctx = &ctx->base;
+ u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
+ cb->user_buffer, &cb->buffer_offset, &cb->buffer);
+ cb->user_buffer = NULL;
+ }
+
+ if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
+ offsets[i] = cb->buffer_offset;
+ bos[i] = fd_resource(cb->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ assert(offset * 4 + params <= v->constlen * 4);
+
+ emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
+ }
}
static inline void
-ir3_emit_ssbo_sizes(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
+ir3_emit_ssbo_sizes(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring,
+ struct fd_shaderbuf_stateobj *sb)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.ssbo_sizes;
- if (v->constlen > offset) {
- uint32_t sizes[align(const_state->ssbo_size.count, 4)];
- unsigned mask = const_state->ssbo_size.mask;
-
- while (mask) {
- unsigned index = u_bit_scan(&mask);
- unsigned off = const_state->ssbo_size.off[index];
- sizes[off] = sb->sb[index].buffer_size;
- }
-
- emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes);
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.ssbo_sizes;
+ if (v->constlen > offset) {
+ uint32_t sizes[align(const_state->ssbo_size.count, 4)];
+ unsigned mask = const_state->ssbo_size.mask;
+
+ while (mask) {
+ unsigned index = u_bit_scan(&mask);
+ unsigned off = const_state->ssbo_size.off[index];
+ sizes[off] = sb->sb[index].buffer_size;
+ }
+
+ emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes);
+ }
}
static inline void
-ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
+ir3_emit_image_dims(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring,
+ struct fd_shaderimg_stateobj *si)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.image_dims;
- if (v->constlen > offset) {
- uint32_t dims[align(const_state->image_dims.count, 4)];
- unsigned mask = const_state->image_dims.mask;
-
- while (mask) {
- struct pipe_image_view *img;
- struct fd_resource *rsc;
- unsigned index = u_bit_scan(&mask);
- unsigned off = const_state->image_dims.off[index];
-
- img = &si->si[index];
- rsc = fd_resource(img->resource);
-
- dims[off + 0] = util_format_get_blocksize(img->format);
- if (img->resource->target != PIPE_BUFFER) {
- struct fdl_slice *slice =
- fd_resource_slice(rsc, img->u.tex.level);
- /* note for 2d/cube/etc images, even if re-interpreted
- * as a different color format, the pixel size should
- * be the same, so use original dimensions for y and z
- * stride:
- */
- dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
- /* see corresponding logic in fd_resource_offset(): */
- if (rsc->layout.layer_first) {
- dims[off + 2] = rsc->layout.layer_size;
- } else {
- dims[off + 2] = slice->size0;
- }
- } else {
- /* For buffer-backed images, the log2 of the format's
- * bytes-per-pixel is placed on the 2nd slot. This is useful
- * when emitting image_size instructions, for which we need
- * to divide by bpp for image buffers. Since the bpp
- * can only be power-of-two, the division is implemented
- * as a SHR, and for that it is handy to have the log2 of
- * bpp as a constant. (log2 = first-set-bit - 1)
- */
- dims[off + 1] = ffs(dims[off + 0]) - 1;
- }
- }
- uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
-
- emit_const_user(ring, v, offset * 4, size, dims);
- }
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.image_dims;
+ if (v->constlen > offset) {
+ uint32_t dims[align(const_state->image_dims.count, 4)];
+ unsigned mask = const_state->image_dims.mask;
+
+ while (mask) {
+ struct pipe_image_view *img;
+ struct fd_resource *rsc;
+ unsigned index = u_bit_scan(&mask);
+ unsigned off = const_state->image_dims.off[index];
+
+ img = &si->si[index];
+ rsc = fd_resource(img->resource);
+
+ dims[off + 0] = util_format_get_blocksize(img->format);
+ if (img->resource->target != PIPE_BUFFER) {
+ struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
+ /* note for 2d/cube/etc images, even if re-interpreted
+ * as a different color format, the pixel size should
+ * be the same, so use original dimensions for y and z
+ * stride:
+ */
+ dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
+ /* see corresponding logic in fd_resource_offset(): */
+ if (rsc->layout.layer_first) {
+ dims[off + 2] = rsc->layout.layer_size;
+ } else {
+ dims[off + 2] = slice->size0;
+ }
+ } else {
+ /* For buffer-backed images, the log2 of the format's
+ * bytes-per-pixel is placed on the 2nd slot. This is useful
+ * when emitting image_size instructions, for which we need
+ * to divide by bpp for image buffers. Since the bpp
+ * can only be power-of-two, the division is implemented
+ * as a SHR, and for that it is handy to have the log2 of
+ * bpp as a constant. (log2 = first-set-bit - 1)
+ */
+ dims[off + 1] = ffs(dims[off + 0]) - 1;
+ }
+ }
+ uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
+
+ emit_const_user(ring, v, offset * 4, size, dims);
+ }
}
static inline void
-ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring)
+ir3_emit_immediates(struct fd_screen *screen,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t base = const_state->offsets.immediate;
- int size = DIV_ROUND_UP(const_state->immediates_count, 4);
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, v->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0)
- emit_const_user(ring, v, base, size, const_state->immediates);
-
- /* NIR constant data has the same lifetime as immediates, so upload it
- * now, too.
- */
- ir3_emit_constant_data(screen, v, ring);
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t base = const_state->offsets.immediate;
+ int size = DIV_ROUND_UP(const_state->immediates_count, 4);
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0)
+ emit_const_user(ring, v, base, size, const_state->immediates);
+
+ /* NIR constant data has the same lifetime as immediates, so upload it
+ * now, too.
+ */
+ ir3_emit_constant_data(screen, v, ring);
}
static inline void
ir3_emit_link_map(struct fd_screen *screen,
- const struct ir3_shader_variant *producer,
- const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+ const struct ir3_shader_variant *producer,
+ const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t base = const_state->offsets.primitive_map;
- int size = DIV_ROUND_UP(v->input_size, 4);
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t base = const_state->offsets.primitive_map;
+ int size = DIV_ROUND_UP(v->input_size, 4);
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, v->constlen) - base;
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
- if (size > 0)
- emit_const_user(ring, v, base, size, producer->output_loc);
+ if (size > 0)
+ emit_const_user(ring, v, base, size, producer->output_loc);
}
/* emit stream-out buffers: */
static inline void
emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring)
+ struct fd_ringbuffer *ring)
{
- /* streamout addresses after driver-params: */
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.tfbo;
- if (v->constlen > offset) {
- struct fd_streamout_stateobj *so = &ctx->streamout;
- struct ir3_stream_output_info *info = &v->shader->stream_output;
- uint32_t params = 4;
- uint32_t offsets[params];
- struct fd_bo *bos[params];
-
- for (uint32_t i = 0; i < params; i++) {
- struct pipe_stream_output_target *target = so->targets[i];
-
- if (target) {
- offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
- target->buffer_offset;
- bos[i] = fd_resource(target->buffer)->bo;
- } else {
- offsets[i] = 0;
- bos[i] = NULL;
- }
- }
-
- assert(offset * 4 + params <= v->constlen * 4);
-
- emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
- }
+ /* streamout addresses after driver-params: */
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.tfbo;
+ if (v->constlen > offset) {
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct ir3_stream_output_info *info = &v->shader->stream_output;
+ uint32_t params = 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+
+ if (target) {
+ offsets[i] =
+ (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
+ bos[i] = fd_resource(target->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ assert(offset * 4 + params <= v->constlen * 4);
+
+ emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
+ }
}
static inline void
-emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, enum pipe_shader_type t)
- assert_dt
+emit_common_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ enum pipe_shader_type t) assert_dt
{
- enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
-
- /* When we use CP_SET_DRAW_STATE objects to emit constant state,
- * if we emit any of it we need to emit all. This is because
- * we are using the same state-group-id each time for uniform
- * state, and if previous update is never evaluated (due to no
- * visible primitives in the current tile) then the new stateobj
- * completely replaces the old one.
- *
- * Possibly if we split up different parts of the const state to
- * different state-objects we could avoid this.
- */
- if (dirty && is_stateobj(ring))
- dirty = ~0;
-
- if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
- struct fd_constbuf_stateobj *constbuf;
- bool shader_dirty;
-
- constbuf = &ctx->constbuf[t];
- shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
-
- ring_wfi(ctx->batch, ring);
-
- ir3_emit_user_consts(ctx->screen, v, ring, constbuf);
- ir3_emit_ubos(ctx, v, ring, constbuf);
- if (shader_dirty)
- ir3_emit_immediates(ctx->screen, v, ring);
- }
-
- if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
- struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
- ring_wfi(ctx->batch, ring);
- ir3_emit_ssbo_sizes(ctx->screen, v, ring, sb);
- }
-
- if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
- struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
- ring_wfi(ctx->batch, ring);
- ir3_emit_image_dims(ctx->screen, v, ring, si);
- }
+ enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
+
+ /* When we use CP_SET_DRAW_STATE objects to emit constant state,
+ * if we emit any of it we need to emit all. This is because
+ * we are using the same state-group-id each time for uniform
+ * state, and if previous update is never evaluated (due to no
+ * visible primitives in the current tile) then the new stateobj
+ * completely replaces the old one.
+ *
+ * Possibly if we split up different parts of the const state to
+ * different state-objects we could avoid this.
+ */
+ if (dirty && is_stateobj(ring))
+ dirty = ~0;
+
+ if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
+ struct fd_constbuf_stateobj *constbuf;
+ bool shader_dirty;
+
+ constbuf = &ctx->constbuf[t];
+ shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
+
+ ring_wfi(ctx->batch, ring);
+
+ ir3_emit_user_consts(ctx->screen, v, ring, constbuf);
+ ir3_emit_ubos(ctx, v, ring, constbuf);
+ if (shader_dirty)
+ ir3_emit_immediates(ctx->screen, v, ring);
+ }
+
+ if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
+ struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
+ ring_wfi(ctx->batch, ring);
+ ir3_emit_ssbo_sizes(ctx->screen, v, ring, sb);
+ }
+
+ if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
+ struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
+ ring_wfi(ctx->batch, ring);
+ ir3_emit_image_dims(ctx->screen, v, ring, si);
+ }
}
static inline void
struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draw)
- assert_dt
+ const struct pipe_draw_start_count *draw) assert_dt
{
- assert(v->need_driver_params);
-
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.driver_param;
- uint32_t vertex_params[IR3_DP_VS_COUNT] = {
- [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
- [IR3_DP_VTXID_BASE] = info->index_size ?
- info->index_bias : draw->start,
- [IR3_DP_INSTID_BASE] = info->start_instance,
- [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
- };
- if (v->key.ucp_enables) {
- struct pipe_clip_state *ucp = &ctx->ucp;
- unsigned pos = IR3_DP_UCP0_X;
- for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
- for (unsigned j = 0; j < 4; j++) {
- vertex_params[pos] = fui(ucp->ucp[i][j]);
- pos++;
- }
- }
- }
-
- /* Only emit as many params as needed, i.e. up to the highest enabled UCP
- * plane. However a binning pass may drop even some of these, so limit to
- * program max.
- */
- const uint32_t vertex_params_size = MIN2(
- const_state->num_driver_params,
- (v->constlen - offset) * 4);
- assert(vertex_params_size <= IR3_DP_VS_COUNT);
-
- bool needs_vtxid_base =
- ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0);
-
- /* for indirect draw, we need to copy VTXID_BASE from
- * indirect-draw parameters buffer.. which is annoying
- * and means we can't easily emit these consts in cmd
- * stream so need to copy them to bo.
- */
- if (indirect && needs_vtxid_base) {
- struct pipe_resource *vertex_params_rsc =
- pipe_buffer_create(&ctx->screen->base,
- PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM,
- vertex_params_size * 4);
- unsigned src_off = indirect->offset;;
- void *ptr;
-
- ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
- memcpy(ptr, vertex_params, vertex_params_size * 4);
-
- if (info->index_size) {
- /* indexed draw, index_bias is 4th field: */
- src_off += 3 * 4;
- } else {
- /* non-indexed draw, start is 3rd field: */
- src_off += 2 * 4;
- }
-
- /* copy index_bias or start from draw params: */
- ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0,
- indirect->buffer, src_off, 1);
-
- emit_const_prsc(ring, v, offset * 4, 0,
- vertex_params_size, vertex_params_rsc);
-
- pipe_resource_reference(&vertex_params_rsc, NULL);
- } else {
- emit_const_user(ring, v, offset * 4,
- vertex_params_size, vertex_params);
- }
-
- /* if needed, emit stream-out buffer addresses: */
- if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
- emit_tfbos(ctx, v, ring);
- }
+ assert(v->need_driver_params);
+
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.driver_param;
+ uint32_t vertex_params[IR3_DP_VS_COUNT] = {
+ [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
+ [IR3_DP_VTXID_BASE] = info->index_size ? info->index_bias : draw->start,
+ [IR3_DP_INSTID_BASE] = info->start_instance,
+ [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
+ };
+ if (v->key.ucp_enables) {
+ struct pipe_clip_state *ucp = &ctx->ucp;
+ unsigned pos = IR3_DP_UCP0_X;
+ for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+ for (unsigned j = 0; j < 4; j++) {
+ vertex_params[pos] = fui(ucp->ucp[i][j]);
+ pos++;
+ }
+ }
+ }
+
+ /* Only emit as many params as needed, i.e. up to the highest enabled UCP
+ * plane. However a binning pass may drop even some of these, so limit to
+ * program max.
+ */
+ const uint32_t vertex_params_size =
+ MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
+ assert(vertex_params_size <= IR3_DP_VS_COUNT);
+
+ bool needs_vtxid_base =
+ ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) !=
+ regid(63, 0);
+
+ /* for indirect draw, we need to copy VTXID_BASE from
+ * indirect-draw parameters buffer.. which is annoying
+ * and means we can't easily emit these consts in cmd
+ * stream so need to copy them to bo.
+ */
+ if (indirect && needs_vtxid_base) {
+ struct pipe_resource *vertex_params_rsc =
+ pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_STREAM, vertex_params_size * 4);
+ unsigned src_off = indirect->offset;
+ ;
+ void *ptr;
+
+ ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
+ memcpy(ptr, vertex_params, vertex_params_size * 4);
+
+ if (info->index_size) {
+ /* indexed draw, index_bias is 4th field: */
+ src_off += 3 * 4;
+ } else {
+ /* non-indexed draw, start is 3rd field: */
+ src_off += 2 * 4;
+ }
+
+ /* copy index_bias or start from draw params: */
+ ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
+ src_off, 1);
+
+ emit_const_prsc(ring, v, offset * 4, 0, vertex_params_size,
+ vertex_params_rsc);
+
+ pipe_resource_reference(&vertex_params_rsc, NULL);
+ } else {
+ emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params);
+ }
+
+ /* if needed, emit stream-out buffer addresses: */
+ if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
+ emit_tfbos(ctx, v, ring);
+ }
}
static inline void
-ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_draw_info *info,
+ir3_emit_vs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draw)
- assert_dt
+ const struct pipe_draw_start_count *draw) assert_dt
{
- debug_assert(v->type == MESA_SHADER_VERTEX);
+ debug_assert(v->type == MESA_SHADER_VERTEX);
- emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
+ emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
- /* emit driver params every time: */
- if (info && v->need_driver_params) {
- ring_wfi(ctx->batch, ring);
- ir3_emit_vs_driver_params(v, ring, ctx, info, indirect, draw);
- }
+ /* emit driver params every time: */
+ if (info && v->need_driver_params) {
+ ring_wfi(ctx->batch, ring);
+ ir3_emit_vs_driver_params(v, ring, ctx, info, indirect, draw);
+ }
}
static inline void
-ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx)
- assert_dt
+ir3_emit_fs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
{
- debug_assert(v->type == MESA_SHADER_FRAGMENT);
+ debug_assert(v->type == MESA_SHADER_FRAGMENT);
- emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
+ emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
}
/* emit compute-shader consts: */
static inline void
-ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
- struct fd_context *ctx, const struct pipe_grid_info *info)
- assert_dt
+ir3_emit_cs_consts(const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_context *ctx,
+ const struct pipe_grid_info *info) assert_dt
{
- debug_assert(gl_shader_stage_is_compute(v->type));
-
- emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
-
- /* emit compute-shader driver-params: */
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.driver_param;
- if (v->constlen > offset) {
- ring_wfi(ctx->batch, ring);
-
- if (info->indirect) {
- struct pipe_resource *indirect = NULL;
- unsigned indirect_offset;
-
- /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
- * to be aligned more strongly than 4 bytes. So in this case
- * we need a temporary buffer to copy NumWorkGroups.xyz to.
- *
- * TODO if previous compute job is writing to info->indirect,
- * we might need a WFI.. but since we currently flush for each
- * compute job, we are probably ok for now.
- */
- if (info->indirect_offset & 0xf) {
- indirect = pipe_buffer_create(&ctx->screen->base,
- PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM,
- 0x1000);
- indirect_offset = 0;
-
- ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect,
- info->indirect_offset, 3);
- } else {
- pipe_resource_reference(&indirect, info->indirect);
- indirect_offset = info->indirect_offset;
- }
-
- emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect);
-
- pipe_resource_reference(&indirect, NULL);
- } else {
- uint32_t compute_params[IR3_DP_CS_COUNT] = {
- [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
- [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
- [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
- [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
- [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
- [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
- };
- uint32_t size = MIN2(const_state->num_driver_params,
- v->constlen * 4 - offset * 4);
-
- emit_const_user(ring, v, offset * 4, size, compute_params);
- }
- }
+ debug_assert(gl_shader_stage_is_compute(v->type));
+
+ emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
+
+ /* emit compute-shader driver-params: */
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ uint32_t offset = const_state->offsets.driver_param;
+ if (v->constlen > offset) {
+ ring_wfi(ctx->batch, ring);
+
+ if (info->indirect) {
+ struct pipe_resource *indirect = NULL;
+ unsigned indirect_offset;
+
+ /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
+ * to be aligned more strongly than 4 bytes. So in this case
+ * we need a temporary buffer to copy NumWorkGroups.xyz to.
+ *
+ * TODO if previous compute job is writing to info->indirect,
+ * we might need a WFI.. but since we currently flush for each
+ * compute job, we are probably ok for now.
+ */
+ if (info->indirect_offset & 0xf) {
+ indirect = pipe_buffer_create(&ctx->screen->base,
+ PIPE_BIND_COMMAND_ARGS_BUFFER,
+ PIPE_USAGE_STREAM, 0x1000);
+ indirect_offset = 0;
+
+ ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect,
+ info->indirect_offset, 3);
+ } else {
+ pipe_resource_reference(&indirect, info->indirect);
+ indirect_offset = info->indirect_offset;
+ }
+
+ emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect);
+
+ pipe_resource_reference(&indirect, NULL);
+ } else {
+ uint32_t compute_params[IR3_DP_CS_COUNT] = {
+ [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
+ [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
+ [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
+ [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
+ [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
+ [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
+ };
+ uint32_t size =
+ MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4);
+
+ emit_const_user(ring, v, offset * 4, size, compute_params);
+ }
+ }
}
* Rob Clark <robclark@freedesktop.org>
*/
-#include "pipe/p_state.h"
#include "pipe/p_screen.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
+#include "pipe/p_state.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
#include "nir/tgsi_to_nir.h"
#include "freedreno_util.h"
#include "ir3/ir3_cache.h"
-#include "ir3/ir3_shader.h"
-#include "ir3/ir3_gallium.h"
#include "ir3/ir3_compiler.h"
+#include "ir3/ir3_gallium.h"
#include "ir3/ir3_nir.h"
+#include "ir3/ir3_shader.h"
/**
* The hardware cso for shader state
* plumb in async compile.
*/
struct ir3_shader_state {
- struct ir3_shader *shader;
+ struct ir3_shader *shader;
- /* Fence signalled when async compile is completed: */
- struct util_queue_fence ready;
+ /* Fence signalled when async compile is completed: */
+ struct util_queue_fence ready;
};
/**
static bool
initial_variants_synchronous(struct fd_context *ctx)
{
- return unlikely(ctx->debug.debug_message) ||
- FD_DBG(SHADERDB) || FD_DBG(SERIALC);
+ return unlikely(ctx->debug.debug_message) || FD_DBG(SHADERDB) ||
+ FD_DBG(SERIALC);
}
static void
-dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug)
+dump_shader_info(struct ir3_shader_variant *v,
+ struct pipe_debug_callback *debug)
{
- if (!FD_DBG(SHADERDB))
- return;
-
- pipe_debug_message(debug, SHADER_INFO,
- "%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, "
- "%u dwords, %u last-baryf, %u half, %u full, %u constlen, "
- "%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, "
- "%u sstall, %u (ss), %u (sy), %d waves, %d max_sun, %d loops\n",
- ir3_shader_stage(v),
- v->info.instrs_count,
- v->info.nops_count,
- v->info.instrs_count - v->info.nops_count,
- v->info.mov_count,
- v->info.cov_count,
- v->info.sizedwords,
- v->info.last_baryf,
- v->info.max_half_reg + 1,
- v->info.max_reg + 1,
- v->constlen,
- v->info.instrs_per_cat[0],
- v->info.instrs_per_cat[1],
- v->info.instrs_per_cat[2],
- v->info.instrs_per_cat[3],
- v->info.instrs_per_cat[4],
- v->info.instrs_per_cat[5],
- v->info.instrs_per_cat[6],
- v->info.instrs_per_cat[7],
- v->info.sstall,
- v->info.ss, v->info.sy,
- v->info.max_waves,
- v->max_sun, v->loops);
+ if (!FD_DBG(SHADERDB))
+ return;
+
+ pipe_debug_message(
+ debug, SHADER_INFO,
+ "%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, "
+ "%u dwords, %u last-baryf, %u half, %u full, %u constlen, "
+ "%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, "
+ "%u sstall, %u (ss), %u (sy), %d waves, %d max_sun, %d loops\n",
+ ir3_shader_stage(v), v->info.instrs_count, v->info.nops_count,
+ v->info.instrs_count - v->info.nops_count, v->info.mov_count,
+ v->info.cov_count, v->info.sizedwords, v->info.last_baryf,
+ v->info.max_half_reg + 1, v->info.max_reg + 1, v->constlen,
+ v->info.instrs_per_cat[0], v->info.instrs_per_cat[1],
+ v->info.instrs_per_cat[2], v->info.instrs_per_cat[3],
+ v->info.instrs_per_cat[4], v->info.instrs_per_cat[5],
+ v->info.instrs_per_cat[6], v->info.instrs_per_cat[7], v->info.sstall,
+ v->info.ss, v->info.sy, v->info.max_waves, v->max_sun, v->loops);
}
static void
upload_shader_variant(struct ir3_shader_variant *v)
{
- struct shader_info *info = &v->shader->nir->info;
- struct ir3_compiler *compiler = v->shader->compiler;
+ struct shader_info *info = &v->shader->nir->info;
+ struct ir3_compiler *compiler = v->shader->compiler;
- assert(!v->bo);
+ assert(!v->bo);
- v->bo = fd_bo_new(compiler->dev, v->info.size,
- DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
- DRM_FREEDRENO_GEM_TYPE_KMEM,
- "%s:%s", ir3_shader_stage(v), info->name);
+ v->bo =
+ fd_bo_new(compiler->dev, v->info.size,
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM,
+ "%s:%s", ir3_shader_stage(v), info->name);
- /* Always include shaders in kernel crash dumps. */
- fd_bo_mark_for_dump(v->bo);
+ /* Always include shaders in kernel crash dumps. */
+ fd_bo_mark_for_dump(v->bo);
- memcpy(fd_bo_map(v->bo), v->bin, v->info.size);
+ memcpy(fd_bo_map(v->bo), v->bin, v->info.size);
}
struct ir3_shader_variant *
ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
- bool binning_pass, struct pipe_debug_callback *debug)
+ bool binning_pass, struct pipe_debug_callback *debug)
{
- struct ir3_shader_variant *v;
- bool created = false;
-
- /* Some shader key values may not be used by a given ir3_shader (for
- * example, fragment shader saturates in the vertex shader), so clean out
- * those flags to avoid recompiling.
- */
- ir3_key_clear_unused(&key, shader);
-
- v = ir3_shader_get_variant(shader, &key, binning_pass, false, &created);
-
- if (created) {
- if (shader->initial_variants_done) {
- pipe_debug_message(debug, SHADER_INFO,
- "%s shader: recompiling at draw time: global 0x%08x, vfsamples %x/%x, astc %x/%x\n",
- ir3_shader_stage(v),
- key.global,
- key.vsamples, key.fsamples,
- key.vastc_srgb, key.fastc_srgb);
-
- }
-
- dump_shader_info(v, debug);
- upload_shader_variant(v);
-
- if (v->binning) {
- upload_shader_variant(v->binning);
- dump_shader_info(v->binning, debug);
- }
- }
-
- return v;
+ struct ir3_shader_variant *v;
+ bool created = false;
+
+ /* Some shader key values may not be used by a given ir3_shader (for
+ * example, fragment shader saturates in the vertex shader), so clean out
+ * those flags to avoid recompiling.
+ */
+ ir3_key_clear_unused(&key, shader);
+
+ v = ir3_shader_get_variant(shader, &key, binning_pass, false, &created);
+
+ if (created) {
+ if (shader->initial_variants_done) {
+ pipe_debug_message(debug, SHADER_INFO,
+ "%s shader: recompiling at draw time: global "
+ "0x%08x, vfsamples %x/%x, astc %x/%x\n",
+ ir3_shader_stage(v), key.global, key.vsamples,
+ key.fsamples, key.vastc_srgb, key.fastc_srgb);
+ }
+
+ dump_shader_info(v, debug);
+ upload_shader_variant(v);
+
+ if (v->binning) {
+ upload_shader_variant(v->binning);
+ dump_shader_info(v->binning, debug);
+ }
+ }
+
+ return v;
}
static void
copy_stream_out(struct ir3_stream_output_info *i,
- const struct pipe_stream_output_info *p)
+ const struct pipe_stream_output_info *p)
{
- STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride));
- STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output));
-
- i->num_outputs = p->num_outputs;
- for (int n = 0; n < ARRAY_SIZE(i->stride); n++)
- i->stride[n] = p->stride[n];
-
- for (int n = 0; n < ARRAY_SIZE(i->output); n++) {
- i->output[n].register_index = p->output[n].register_index;
- i->output[n].start_component = p->output[n].start_component;
- i->output[n].num_components = p->output[n].num_components;
- i->output[n].output_buffer = p->output[n].output_buffer;
- i->output[n].dst_offset = p->output[n].dst_offset;
- i->output[n].stream = p->output[n].stream;
- }
+ STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride));
+ STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output));
+
+ i->num_outputs = p->num_outputs;
+ for (int n = 0; n < ARRAY_SIZE(i->stride); n++)
+ i->stride[n] = p->stride[n];
+
+ for (int n = 0; n < ARRAY_SIZE(i->output); n++) {
+ i->output[n].register_index = p->output[n].register_index;
+ i->output[n].start_component = p->output[n].start_component;
+ i->output[n].num_components = p->output[n].num_components;
+ i->output[n].output_buffer = p->output[n].output_buffer;
+ i->output[n].dst_offset = p->output[n].dst_offset;
+ i->output[n].stream = p->output[n].stream;
+ }
}
static void
create_initial_variants(struct ir3_shader_state *hwcso,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug)
{
- struct ir3_shader *shader = hwcso->shader;
- struct ir3_compiler *compiler = shader->compiler;
- nir_shader *nir = shader->nir;
-
- /* Compile standard variants immediately to try to avoid draw-time stalls
- * to run the compiler.
- */
- struct ir3_shader_key key = {
- .tessellation = IR3_TESS_NONE,
- .ucp_enables = MASK(nir->info.clip_distance_array_size),
- .msaa = true,
- };
-
- switch (nir->info.stage) {
- case MESA_SHADER_TESS_EVAL:
- key.tessellation = ir3_tess_mode(nir->info.tess.primitive_mode);
- break;
-
- case MESA_SHADER_TESS_CTRL:
- /* The primitive_mode field, while it exists for TCS, is not
- * populated (since separable shaders between TCS/TES are legal,
- * so TCS wouldn't have access to TES's declaration). Make a
- * guess so that we shader-db something plausible for TCS.
- */
- if (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER)
- key.tessellation = IR3_TESS_TRIANGLES;
- else
- key.tessellation = IR3_TESS_ISOLINES;
- break;
-
- case MESA_SHADER_GEOMETRY:
- key.has_gs = true;
- break;
-
- default:
- break;
- }
-
- key.safe_constlen = false;
- struct ir3_shader_variant *v = ir3_shader_variant(shader, key, false, debug);
- if (!v)
- return;
-
- if (v->constlen > compiler->max_const_safe) {
- key.safe_constlen = true;
- ir3_shader_variant(shader, key, false, debug);
- }
-
- /* For vertex shaders, also compile initial binning pass shader: */
- if (nir->info.stage == MESA_SHADER_VERTEX) {
- key.safe_constlen = false;
- v = ir3_shader_variant(shader, key, true, debug);
- if (!v)
- return;
-
- if (v->constlen > compiler->max_const_safe) {
- key.safe_constlen = true;
- ir3_shader_variant(shader, key, true, debug);
- }
- }
-
- shader->initial_variants_done = true;
+ struct ir3_shader *shader = hwcso->shader;
+ struct ir3_compiler *compiler = shader->compiler;
+ nir_shader *nir = shader->nir;
+
+ /* Compile standard variants immediately to try to avoid draw-time stalls
+ * to run the compiler.
+ */
+ struct ir3_shader_key key = {
+ .tessellation = IR3_TESS_NONE,
+ .ucp_enables = MASK(nir->info.clip_distance_array_size),
+ .msaa = true,
+ };
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_TESS_EVAL:
+ key.tessellation = ir3_tess_mode(nir->info.tess.primitive_mode);
+ break;
+
+ case MESA_SHADER_TESS_CTRL:
+ /* The primitive_mode field, while it exists for TCS, is not
+ * populated (since separable shaders between TCS/TES are legal,
+ * so TCS wouldn't have access to TES's declaration). Make a
+ * guess so that we shader-db something plausible for TCS.
+ */
+ if (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER)
+ key.tessellation = IR3_TESS_TRIANGLES;
+ else
+ key.tessellation = IR3_TESS_ISOLINES;
+ break;
+
+ case MESA_SHADER_GEOMETRY:
+ key.has_gs = true;
+ break;
+
+ default:
+ break;
+ }
+
+ key.safe_constlen = false;
+ struct ir3_shader_variant *v = ir3_shader_variant(shader, key, false, debug);
+ if (!v)
+ return;
+
+ if (v->constlen > compiler->max_const_safe) {
+ key.safe_constlen = true;
+ ir3_shader_variant(shader, key, false, debug);
+ }
+
+ /* For vertex shaders, also compile initial binning pass shader: */
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ key.safe_constlen = false;
+ v = ir3_shader_variant(shader, key, true, debug);
+ if (!v)
+ return;
+
+ if (v->constlen > compiler->max_const_safe) {
+ key.safe_constlen = true;
+ ir3_shader_variant(shader, key, true, debug);
+ }
+ }
+
+ shader->initial_variants_done = true;
}
static void
create_initial_variants_async(void *job, int thread_index)
{
- struct ir3_shader_state *hwcso = job;
- struct pipe_debug_callback debug = {};
+ struct ir3_shader_state *hwcso = job;
+ struct pipe_debug_callback debug = {};
- create_initial_variants(hwcso, &debug);
+ create_initial_variants(hwcso, &debug);
}
static void
create_initial_compute_variants_async(void *job, int thread_index)
{
- struct ir3_shader_state *hwcso = job;
- struct ir3_shader *shader = hwcso->shader;
- struct pipe_debug_callback debug = {};
- static struct ir3_shader_key key; /* static is implicitly zeroed */
+ struct ir3_shader_state *hwcso = job;
+ struct ir3_shader *shader = hwcso->shader;
+ struct pipe_debug_callback debug = {};
+ static struct ir3_shader_key key; /* static is implicitly zeroed */
- ir3_shader_variant(shader, key, false, &debug);
- shader->initial_variants_done = true;
+ ir3_shader_variant(shader, key, false, &debug);
+ shader->initial_variants_done = true;
}
/* a bit annoying that compute-shader and normal shader state objects
*/
void *
ir3_shader_compute_state_create(struct pipe_context *pctx,
- const struct pipe_compute_state *cso)
+ const struct pipe_compute_state *cso)
{
- struct fd_context *ctx = fd_context(pctx);
-
- /* req_input_mem will only be non-zero for cl kernels (ie. clover).
- * This isn't a perfect test because I guess it is possible (but
- * uncommon) for none for the kernel parameters to be a global,
- * but ctx->set_global_bindings() can't fail, so this is the next
- * best place to fail if we need a newer version of kernel driver:
- */
- if ((cso->req_input_mem > 0) &&
- fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) {
- return NULL;
- }
-
- struct ir3_compiler *compiler = ctx->screen->compiler;
- nir_shader *nir;
-
- if (cso->ir_type == PIPE_SHADER_IR_NIR) {
- /* we take ownership of the reference: */
- nir = (nir_shader *)cso->prog;
- } else {
- debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
- if (ir3_shader_debug & IR3_DBG_DISASM) {
- tgsi_dump(cso->prog, 0);
- }
- nir = tgsi_to_nir(cso->prog, pctx->screen, false);
- }
-
- struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL);
- struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
-
- util_queue_fence_init(&hwcso->ready);
- hwcso->shader = shader;
-
- /* Immediately compile a standard variant. We have so few variants in our
- * shaders, that doing so almost eliminates draw-time recompiles. (This
- * is also how we get data from shader-db's ./run)
- */
-
- if (initial_variants_synchronous(ctx)) {
- static struct ir3_shader_key key; /* static is implicitly zeroed */
- ir3_shader_variant(shader, key, false, &ctx->debug);
- shader->initial_variants_done = true;
- } else {
- struct fd_screen *screen = ctx->screen;
- util_queue_add_job(&screen->compile_queue, hwcso,
- &hwcso->ready, create_initial_compute_variants_async,
- NULL, 0);
- }
-
- return hwcso;
+ struct fd_context *ctx = fd_context(pctx);
+
+ /* req_input_mem will only be non-zero for cl kernels (ie. clover).
+ * This isn't a perfect test because I guess it is possible (but
+ * uncommon) for none for the kernel parameters to be a global,
+ * but ctx->set_global_bindings() can't fail, so this is the next
+ * best place to fail if we need a newer version of kernel driver:
+ */
+ if ((cso->req_input_mem > 0) &&
+ fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) {
+ return NULL;
+ }
+
+ struct ir3_compiler *compiler = ctx->screen->compiler;
+ nir_shader *nir;
+
+ if (cso->ir_type == PIPE_SHADER_IR_NIR) {
+ /* we take ownership of the reference: */
+ nir = (nir_shader *)cso->prog;
+ } else {
+ debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
+ if (ir3_shader_debug & IR3_DBG_DISASM) {
+ tgsi_dump(cso->prog, 0);
+ }
+ nir = tgsi_to_nir(cso->prog, pctx->screen, false);
+ }
+
+ struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL);
+ struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
+
+ util_queue_fence_init(&hwcso->ready);
+ hwcso->shader = shader;
+
+ /* Immediately compile a standard variant. We have so few variants in our
+ * shaders, that doing so almost eliminates draw-time recompiles. (This
+ * is also how we get data from shader-db's ./run)
+ */
+
+ if (initial_variants_synchronous(ctx)) {
+ static struct ir3_shader_key key; /* static is implicitly zeroed */
+ ir3_shader_variant(shader, key, false, &ctx->debug);
+ shader->initial_variants_done = true;
+ } else {
+ struct fd_screen *screen = ctx->screen;
+ util_queue_add_job(&screen->compile_queue, hwcso, &hwcso->ready,
+ create_initial_compute_variants_async, NULL, 0);
+ }
+
+ return hwcso;
}
void *
-ir3_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso)
+ir3_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
{
- struct fd_context *ctx = fd_context(pctx);
- struct ir3_compiler *compiler = ctx->screen->compiler;
- struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
-
- /*
- * Convert to nir (if necessary):
- */
-
- nir_shader *nir;
- if (cso->type == PIPE_SHADER_IR_NIR) {
- /* we take ownership of the reference: */
- nir = cso->ir.nir;
- } else {
- debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
- if (ir3_shader_debug & IR3_DBG_DISASM) {
- tgsi_dump(cso->tokens, 0);
- }
- nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
- }
-
- /*
- * Create ir3_shader:
- *
- * This part is cheap, it doesn't compile initial variants
- */
-
- struct ir3_stream_output_info stream_output = {};
- copy_stream_out(&stream_output, &cso->stream_output);
-
- hwcso->shader = ir3_shader_from_nir(compiler, nir, 0, &stream_output);
-
- /*
- * Create initial variants to avoid draw-time stalls. This is
- * normally done asynchronously, unless debug is enabled (which
- * will be the case for shader-db)
- */
-
- util_queue_fence_init(&hwcso->ready);
-
- if (initial_variants_synchronous(ctx)) {
- create_initial_variants(hwcso, &ctx->debug);
- } else {
- util_queue_add_job(&ctx->screen->compile_queue, hwcso,
- &hwcso->ready, create_initial_variants_async,
- NULL, 0);
- }
-
- return hwcso;
+ struct fd_context *ctx = fd_context(pctx);
+ struct ir3_compiler *compiler = ctx->screen->compiler;
+ struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
+
+ /*
+ * Convert to nir (if necessary):
+ */
+
+ nir_shader *nir;
+ if (cso->type == PIPE_SHADER_IR_NIR) {
+ /* we take ownership of the reference: */
+ nir = cso->ir.nir;
+ } else {
+ debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
+ if (ir3_shader_debug & IR3_DBG_DISASM) {
+ tgsi_dump(cso->tokens, 0);
+ }
+ nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
+ }
+
+ /*
+ * Create ir3_shader:
+ *
+ * This part is cheap, it doesn't compile initial variants
+ */
+
+ struct ir3_stream_output_info stream_output = {};
+ copy_stream_out(&stream_output, &cso->stream_output);
+
+ hwcso->shader = ir3_shader_from_nir(compiler, nir, 0, &stream_output);
+
+ /*
+ * Create initial variants to avoid draw-time stalls. This is
+ * normally done asynchronously, unless debug is enabled (which
+ * will be the case for shader-db)
+ */
+
+ util_queue_fence_init(&hwcso->ready);
+
+ if (initial_variants_synchronous(ctx)) {
+ create_initial_variants(hwcso, &ctx->debug);
+ } else {
+ util_queue_add_job(&ctx->screen->compile_queue, hwcso, &hwcso->ready,
+ create_initial_variants_async, NULL, 0);
+ }
+
+ return hwcso;
}
void
ir3_shader_state_delete(struct pipe_context *pctx, void *_hwcso)
{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_screen *screen = ctx->screen;
- struct ir3_shader_state *hwcso = _hwcso;
- struct ir3_shader *so = hwcso->shader;
-
- ir3_cache_invalidate(ctx->shader_cache, hwcso);
-
- /* util_queue_drop_job() guarantees that either:
- * 1) job did not execute
- * 2) job completed
- *
- * In either case the fence is signaled
- */
- util_queue_drop_job(&screen->compile_queue, &hwcso->ready);
-
- /* free the uploaded shaders, since this is handled outside of the
- * shared ir3 code (ie. not used by turnip):
- */
- for (struct ir3_shader_variant *v = so->variants; v; v = v->next) {
- fd_bo_del(v->bo);
- v->bo = NULL;
-
- if (v->binning && v->binning->bo) {
- fd_bo_del(v->binning->bo);
- v->binning->bo = NULL;
- }
- }
-
- ir3_shader_destroy(so);
- util_queue_fence_destroy(&hwcso->ready);
- free(hwcso);
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_screen *screen = ctx->screen;
+ struct ir3_shader_state *hwcso = _hwcso;
+ struct ir3_shader *so = hwcso->shader;
+
+ ir3_cache_invalidate(ctx->shader_cache, hwcso);
+
+ /* util_queue_drop_job() guarantees that either:
+ * 1) job did not execute
+ * 2) job completed
+ *
+ * In either case the fence is signaled
+ */
+ util_queue_drop_job(&screen->compile_queue, &hwcso->ready);
+
+ /* free the uploaded shaders, since this is handled outside of the
+ * shared ir3 code (ie. not used by turnip):
+ */
+ for (struct ir3_shader_variant *v = so->variants; v; v = v->next) {
+ fd_bo_del(v->bo);
+ v->bo = NULL;
+
+ if (v->binning && v->binning->bo) {
+ fd_bo_del(v->binning->bo);
+ v->binning->bo = NULL;
+ }
+ }
+
+ ir3_shader_destroy(so);
+ util_queue_fence_destroy(&hwcso->ready);
+ free(hwcso);
}
struct ir3_shader *
ir3_get_shader(struct ir3_shader_state *hwcso)
{
- if (!hwcso)
- return NULL;
-
- struct ir3_shader *shader = hwcso->shader;
- perf_time(1000, "waited for %s:%s:%s variants",
- _mesa_shader_stage_to_abbrev(shader->type),
- shader->nir->info.name, shader->nir->info.label) {
- /* wait for initial variants to compile: */
- util_queue_fence_wait(&hwcso->ready);
- }
-
- return shader;
+ if (!hwcso)
+ return NULL;
+
+ struct ir3_shader *shader = hwcso->shader;
+ perf_time(1000, "waited for %s:%s:%s variants",
+ _mesa_shader_stage_to_abbrev(shader->type), shader->nir->info.name,
+ shader->nir->info.label)
+ {
+ /* wait for initial variants to compile: */
+ util_queue_fence_wait(&hwcso->ready);
+ }
+
+ return shader;
}
struct shader_info *
ir3_get_shader_info(struct ir3_shader_state *hwcso)
{
- if (!hwcso)
- return NULL;
- return &hwcso->shader->nir->info;
+ if (!hwcso)
+ return NULL;
+ return &hwcso->shader->nir->info;
}
/* fixup dirty shader state in case some "unrelated" (from the state-
void
ir3_fixup_shader_state(struct pipe_context *pctx, struct ir3_shader_key *key)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(pctx);
- if (!ir3_shader_key_equal(ctx->last.key, key)) {
- if (ir3_shader_key_changes_fs(ctx->last.key, key)) {
- fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_PROG);
- }
+ if (!ir3_shader_key_equal(ctx->last.key, key)) {
+ if (ir3_shader_key_changes_fs(ctx->last.key, key)) {
+ fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT,
+ FD_DIRTY_SHADER_PROG);
+ }
- if (ir3_shader_key_changes_vs(ctx->last.key, key)) {
- fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG);
- }
+ if (ir3_shader_key_changes_vs(ctx->last.key, key)) {
+ fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG);
+ }
- /* NOTE: currently only a6xx has gs/tess, but needs no
- * gs/tess specific lowering.
- */
+ /* NOTE: currently only a6xx has gs/tess, but needs no
+ * gs/tess specific lowering.
+ */
- *ctx->last.key = *key;
- }
+ *ctx->last.key = *key;
+ }
}
static void
ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir, bool optimize)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- ir3_finalize_nir(screen->compiler, nir);
+ ir3_finalize_nir(screen->compiler, nir);
}
static void
-ir3_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_threads)
+ir3_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
+ unsigned max_threads)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- /* This function doesn't allow a greater number of threads than
- * the queue had at its creation.
- */
- util_queue_adjust_num_threads(&screen->compile_queue, max_threads);
+ /* This function doesn't allow a greater number of threads than
+ * the queue had at its creation.
+ */
+ util_queue_adjust_num_threads(&screen->compile_queue, max_threads);
}
static bool
ir3_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
- void *shader, enum pipe_shader_type shader_type)
+ void *shader,
+ enum pipe_shader_type shader_type)
{
- struct ir3_shader_state *hwcso = (struct ir3_shader_state *)shader;
+ struct ir3_shader_state *hwcso = (struct ir3_shader_state *)shader;
- return util_queue_fence_is_signalled(&hwcso->ready);
+ return util_queue_fence_is_signalled(&hwcso->ready);
}
void
ir3_prog_init(struct pipe_context *pctx)
{
- pctx->create_vs_state = ir3_shader_state_create;
- pctx->delete_vs_state = ir3_shader_state_delete;
+ pctx->create_vs_state = ir3_shader_state_create;
+ pctx->delete_vs_state = ir3_shader_state_delete;
- pctx->create_tcs_state = ir3_shader_state_create;
- pctx->delete_tcs_state = ir3_shader_state_delete;
+ pctx->create_tcs_state = ir3_shader_state_create;
+ pctx->delete_tcs_state = ir3_shader_state_delete;
- pctx->create_tes_state = ir3_shader_state_create;
- pctx->delete_tes_state = ir3_shader_state_delete;
+ pctx->create_tes_state = ir3_shader_state_create;
+ pctx->delete_tes_state = ir3_shader_state_delete;
- pctx->create_gs_state = ir3_shader_state_create;
- pctx->delete_gs_state = ir3_shader_state_delete;
+ pctx->create_gs_state = ir3_shader_state_create;
+ pctx->delete_gs_state = ir3_shader_state_delete;
- pctx->create_fs_state = ir3_shader_state_create;
- pctx->delete_fs_state = ir3_shader_state_delete;
+ pctx->create_fs_state = ir3_shader_state_create;
+ pctx->delete_fs_state = ir3_shader_state_delete;
}
void
ir3_screen_init(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
-
- screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id);
-
- /* TODO do we want to limit things to # of fast cores, or just limit
- * based on total # of both big and little cores. The little cores
- * tend to be in-order and probably much slower for compiling than
- * big cores. OTOH if they are sitting idle, maybe it is useful to
- * use them?
- */
- unsigned num_threads = sysconf(_SC_NPROCESSORS_ONLN) - 1;
-
- util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads,
- UTIL_QUEUE_INIT_RESIZE_IF_FULL |
- UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY);
-
- pscreen->finalize_nir = ir3_screen_finalize_nir;
- pscreen->set_max_shader_compiler_threads =
- ir3_set_max_shader_compiler_threads;
- pscreen->is_parallel_shader_compilation_finished =
- ir3_is_parallel_shader_compilation_finished;
+ struct fd_screen *screen = fd_screen(pscreen);
+
+ screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id);
+
+ /* TODO do we want to limit things to # of fast cores, or just limit
+ * based on total # of both big and little cores. The little cores
+ * tend to be in-order and probably much slower for compiling than
+ * big cores. OTOH if they are sitting idle, maybe it is useful to
+ * use them?
+ */
+ unsigned num_threads = sysconf(_SC_NPROCESSORS_ONLN) - 1;
+
+ util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads,
+ UTIL_QUEUE_INIT_RESIZE_IF_FULL |
+ UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY);
+
+ pscreen->finalize_nir = ir3_screen_finalize_nir;
+ pscreen->set_max_shader_compiler_threads =
+ ir3_set_max_shader_compiler_threads;
+ pscreen->is_parallel_shader_compilation_finished =
+ ir3_is_parallel_shader_compilation_finished;
}
void
ir3_screen_fini(struct pipe_screen *pscreen)
{
- struct fd_screen *screen = fd_screen(pscreen);
+ struct fd_screen *screen = fd_screen(pscreen);
- util_queue_destroy(&screen->compile_queue);
- ir3_compiler_destroy(screen->compiler);
- screen->compiler = NULL;
+ util_queue_destroy(&screen->compile_queue);
+ ir3_compiler_destroy(screen->compiler);
+ screen->compiler = NULL;
}
void
-ir3_update_max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v)
+ir3_update_max_tf_vtx(struct fd_context *ctx,
+ const struct ir3_shader_variant *v)
{
- struct fd_streamout_stateobj *so = &ctx->streamout;
- struct ir3_stream_output_info *info = &v->shader->stream_output;
- uint32_t maxvtxcnt = 0x7fffffff;
-
- if (v->shader->stream_output.num_outputs == 0)
- ctx->streamout.max_tf_vtx = 0;
- if (so->num_targets == 0)
- ctx->streamout.max_tf_vtx = 0;
-
- /* offset to write to is:
- *
- * total_vtxcnt = vtxcnt + offsets[i]
- * offset = total_vtxcnt * stride[i]
- *
- * offset = vtxcnt * stride[i] ; calculated in shader
- * + offsets[i] * stride[i] ; calculated at emit_tfbos()
- *
- * assuming for each vtx, each target buffer will have data written
- * up to 'offset + stride[i]', that leaves maxvtxcnt as:
- *
- * buffer_size = (maxvtxcnt * stride[i]) + stride[i]
- * maxvtxcnt = (buffer_size - stride[i]) / stride[i]
- *
- * but shader is actually doing a less-than (rather than less-than-
- * equal) check, so we can drop the -stride[i].
- *
- * TODO is assumption about `offset + stride[i]` legit?
- */
- for (unsigned i = 0; i < so->num_targets; i++) {
- struct pipe_stream_output_target *target = so->targets[i];
- unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */
- if (target) {
- uint32_t max = target->buffer_size / stride;
- maxvtxcnt = MIN2(maxvtxcnt, max);
- }
- }
-
- ctx->streamout.max_tf_vtx = maxvtxcnt;
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct ir3_stream_output_info *info = &v->shader->stream_output;
+ uint32_t maxvtxcnt = 0x7fffffff;
+
+ if (v->shader->stream_output.num_outputs == 0)
+ ctx->streamout.max_tf_vtx = 0;
+ if (so->num_targets == 0)
+ ctx->streamout.max_tf_vtx = 0;
+
+ /* offset to write to is:
+ *
+ * total_vtxcnt = vtxcnt + offsets[i]
+ * offset = total_vtxcnt * stride[i]
+ *
+ * offset = vtxcnt * stride[i] ; calculated in shader
+ * + offsets[i] * stride[i] ; calculated at emit_tfbos()
+ *
+ * assuming for each vtx, each target buffer will have data written
+ * up to 'offset + stride[i]', that leaves maxvtxcnt as:
+ *
+ * buffer_size = (maxvtxcnt * stride[i]) + stride[i]
+ * maxvtxcnt = (buffer_size - stride[i]) / stride[i]
+ *
+ * but shader is actually doing a less-than (rather than less-than-
+ * equal) check, so we can drop the -stride[i].
+ *
+ * TODO is assumption about `offset + stride[i]` legit?
+ */
+ for (unsigned i = 0; i < so->num_targets; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+ unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */
+ if (target) {
+ uint32_t max = target->buffer_size / stride;
+ maxvtxcnt = MIN2(maxvtxcnt, max);
+ }
+ }
+
+ ctx->streamout.max_tf_vtx = maxvtxcnt;
}
#ifndef IR3_GALLIUM_H_
#define IR3_GALLIUM_H_
-#include "pipe/p_state.h"
-#include "pipe/p_screen.h"
#include "ir3/ir3_shader.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
#include "freedreno_util.h"
*/
struct ir3_shader_state;
-struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
- struct ir3_shader_key key, bool binning_pass,
- struct pipe_debug_callback *debug);
+struct ir3_shader_variant *
+ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
+ bool binning_pass, struct pipe_debug_callback *debug);
-void * ir3_shader_compute_state_create(struct pipe_context *pctx,
- const struct pipe_compute_state *cso);
-void * ir3_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso);
+void *ir3_shader_compute_state_create(struct pipe_context *pctx,
+ const struct pipe_compute_state *cso);
+void *ir3_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso);
void ir3_shader_state_delete(struct pipe_context *pctx, void *hwcso);
-struct ir3_shader * ir3_get_shader(struct ir3_shader_state *hwcso);
-struct shader_info * ir3_get_shader_info(struct ir3_shader_state *hwcso);
+struct ir3_shader *ir3_get_shader(struct ir3_shader_state *hwcso);
+struct shader_info *ir3_get_shader_info(struct ir3_shader_state *hwcso);
-void ir3_fixup_shader_state(struct pipe_context *pctx, struct ir3_shader_key *key) assert_dt;
+void ir3_fixup_shader_state(struct pipe_context *pctx,
+ struct ir3_shader_key *key) assert_dt;
void ir3_prog_init(struct pipe_context *pctx);
void ir3_screen_init(struct pipe_screen *pscreen);
*/
static inline bool
ir3_point_sprite(const struct ir3_shader_variant *fs, int i,
- uint32_t sprite_coord_enable, bool *coord_mode)
+ uint32_t sprite_coord_enable, bool *coord_mode)
{
- gl_varying_slot slot = fs->inputs[i].slot;
- switch (slot) {
- case VARYING_SLOT_PNTC:
- *coord_mode = true;
- return true;
- case VARYING_SLOT_TEX0 ... VARYING_SLOT_TEX7:
- return !!(sprite_coord_enable & BITFIELD_BIT(slot - VARYING_SLOT_TEX0));
- default:
- return false;
- }
+ gl_varying_slot slot = fs->inputs[i].slot;
+ switch (slot) {
+ case VARYING_SLOT_PNTC:
+ *coord_mode = true;
+ return true;
+ case VARYING_SLOT_TEX0 ... VARYING_SLOT_TEX7:
+ return !!(sprite_coord_enable & BITFIELD_BIT(slot - VARYING_SLOT_TEX0));
+ default:
+ return false;
+ }
}
-void ir3_update_max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) assert_dt;
+void ir3_update_max_tf_vtx(struct fd_context *ctx,
+ const struct ir3_shader_variant *v) assert_dt;
#endif /* IR3_GALLIUM_H_ */