freedreno/a5xx: initial support
authorRob Clark <robdclark@gmail.com>
Tue, 8 Nov 2016 15:50:03 +0000 (10:50 -0500)
committerRob Clark <robdclark@gmail.com>
Wed, 30 Nov 2016 17:35:49 +0000 (12:35 -0500)
Signed-off-by: Rob Clark <robdclark@gmail.com>
34 files changed:
configure.ac
src/gallium/drivers/freedreno/Makefile.am
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a5xx/fd5_blend.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_blend.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_context.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_context.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_draw.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_draw.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_emit.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_emit.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_format.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_format.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_gmem.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_gmem.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_program.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_program.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_query.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_query.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_screen.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_screen.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_texture.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_texture.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_zsa.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_zsa.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_gmem.c
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/freedreno_screen.h
src/gallium/drivers/freedreno/freedreno_util.h

index e7e5628..f62bc61 100644 (file)
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
-LIBDRM_FREEDRENO_REQUIRED=2.4.68
+LIBDRM_FREEDRENO_REQUIRED=2.4.74
 LIBDRM_VC4_REQUIRED=2.4.69
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
index 148dd0e..e5c344d 100644 (file)
@@ -20,6 +20,7 @@ libfreedreno_la_SOURCES = \
        $(a2xx_SOURCES) \
        $(a3xx_SOURCES) \
        $(a4xx_SOURCES) \
+       $(a5xx_SOURCES) \
        $(ir3_SOURCES) \
        $(ir3_GENERATED_FILES)
 
index 92d9186..b53a23e 100644 (file)
@@ -120,6 +120,33 @@ a4xx_SOURCES := \
        a4xx/fd4_zsa.c \
        a4xx/fd4_zsa.h
 
+a5xx_SOURCES := \
+       a5xx/a5xx.xml.h \
+       a5xx/fd5_blend.c \
+       a5xx/fd5_blend.h \
+       a5xx/fd5_context.c \
+       a5xx/fd5_context.h \
+       a5xx/fd5_draw.c \
+       a5xx/fd5_draw.h \
+       a5xx/fd5_emit.c \
+       a5xx/fd5_emit.h \
+       a5xx/fd5_format.c \
+       a5xx/fd5_format.h \
+       a5xx/fd5_gmem.c \
+       a5xx/fd5_gmem.h \
+       a5xx/fd5_program.c \
+       a5xx/fd5_program.h \
+       a5xx/fd5_query.c \
+       a5xx/fd5_query.h \
+       a5xx/fd5_rasterizer.c \
+       a5xx/fd5_rasterizer.h \
+       a5xx/fd5_screen.c \
+       a5xx/fd5_screen.h \
+       a5xx/fd5_texture.c \
+       a5xx/fd5_texture.h \
+       a5xx/fd5_zsa.c \
+       a5xx/fd5_zsa.h
+
 ir3_SOURCES := \
        ir3/disasm-a3xx.c \
        ir3/instr-a3xx.h \
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c
new file mode 100644 (file)
index 0000000..e5107a7
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_blend.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd5_blend.h"
+#include "fd5_context.h"
+#include "fd5_format.h"
+
+// XXX move somewhere common.. same across a3xx/a4xx/a5xx..
+static enum a3xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+       switch (func) {
+       case PIPE_BLEND_ADD:
+               return BLEND_DST_PLUS_SRC;
+       case PIPE_BLEND_MIN:
+               return BLEND_MIN_DST_SRC;
+       case PIPE_BLEND_MAX:
+               return BLEND_MAX_DST_SRC;
+       case PIPE_BLEND_SUBTRACT:
+               return BLEND_SRC_MINUS_DST;
+       case PIPE_BLEND_REVERSE_SUBTRACT:
+               return BLEND_DST_MINUS_SRC;
+       default:
+               DBG("invalid blend func: %x", func);
+               return 0;
+       }
+}
+
+void *
+fd5_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso)
+{
+       struct fd5_blend_stateobj *so;
+//     enum a3xx_rop_code rop = ROP_COPY;
+       bool reads_dest = false;
+       unsigned i, mrt_blend = 0;
+
+       if (cso->logicop_enable) {
+//             rop = cso->logicop_func;  /* maps 1:1 */
+
+               switch (cso->logicop_func) {
+               case PIPE_LOGICOP_NOR:
+               case PIPE_LOGICOP_AND_INVERTED:
+               case PIPE_LOGICOP_AND_REVERSE:
+               case PIPE_LOGICOP_INVERT:
+               case PIPE_LOGICOP_XOR:
+               case PIPE_LOGICOP_NAND:
+               case PIPE_LOGICOP_AND:
+               case PIPE_LOGICOP_EQUIV:
+               case PIPE_LOGICOP_NOOP:
+               case PIPE_LOGICOP_OR_INVERTED:
+               case PIPE_LOGICOP_OR_REVERSE:
+               case PIPE_LOGICOP_OR:
+                       reads_dest = true;
+                       break;
+               }
+       }
+
+       so = CALLOC_STRUCT(fd5_blend_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+               const struct pipe_rt_blend_state *rt;
+
+               if (cso->independent_blend_enable)
+                       rt = &cso->rt[i];
+               else
+                       rt = &cso->rt[0];
+
+               so->rb_mrt[i].blend_control_rgb =
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+
+               so->rb_mrt[i].blend_control_alpha =
+                               A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+                               A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
+                               A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+
+               so->rb_mrt[i].blend_control_no_alpha_rgb =
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                               A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
+
+               so->rb_mrt[i].control =
+//                             A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+//                             COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
+                               0x60 | /* XXX set other than RECTLIST clear blits?? */
+                               A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+               if (rt->blend_enable) {
+                       so->rb_mrt[i].control |=
+//                                     A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+                                       A5XX_RB_MRT_CONTROL_BLEND |
+                                       A5XX_RB_MRT_CONTROL_BLEND2;
+                       mrt_blend |= (1 << i);
+               }
+
+               if (reads_dest) {
+//                     so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+                       mrt_blend |= (1 << i);
+               }
+
+//             if (cso->dither)
+//                     so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+       }
+
+       so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
+               COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h
new file mode 100644 (file)
index 0000000..85c6158
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_BLEND_H_
+#define FD5_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd5_blend_stateobj {
+       struct pipe_blend_state base;
+
+       struct {
+               uint32_t control;
+               uint32_t buf_info;
+               /* Blend control bits for color if there is an alpha channel */
+               uint32_t blend_control_rgb;
+               /* Blend control bits for color if there is no alpha channel */
+               uint32_t blend_control_no_alpha_rgb;
+               /* Blend control bits for alpha channel */
+               uint32_t blend_control_alpha;
+       } rb_mrt[A5XX_MAX_RENDER_TARGETS];
+       uint32_t rb_blend_cntl;
+};
+
+static inline struct fd5_blend_stateobj *
+fd5_blend_stateobj(struct pipe_blend_state *blend)
+{
+       return (struct fd5_blend_stateobj *)blend;
+}
+
+void * fd5_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso);
+
+#endif /* FD5_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/src/gallium/drivers/freedreno/a5xx/fd5_context.c
new file mode 100644 (file)
index 0000000..1e4e83c
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "fd5_context.h"
+#include "fd5_blend.h"
+#include "fd5_draw.h"
+#include "fd5_emit.h"
+#include "fd5_gmem.h"
+#include "fd5_program.h"
+#include "fd5_query.h"
+#include "fd5_rasterizer.h"
+#include "fd5_texture.h"
+#include "fd5_zsa.h"
+
+static void
+fd5_context_destroy(struct pipe_context *pctx)
+{
+       struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
+
+       fd_bo_del(fd5_ctx->vs_pvt_mem);
+       fd_bo_del(fd5_ctx->fs_pvt_mem);
+       fd_bo_del(fd5_ctx->vsc_size_mem);
+       fd_bo_del(fd5_ctx->blit_mem);
+
+       fd_context_cleanup_common_vbos(&fd5_ctx->base);
+
+       u_upload_destroy(fd5_ctx->border_color_uploader);
+
+       fd_context_destroy(pctx);
+}
+
+static const uint8_t primtypes[] = {
+               [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST,
+               [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+               [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+               [PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
+               [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+               [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+               [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+               [PIPE_PRIM_MAX]            = DI_PT_RECTLIST,  /* internal clear blits */
+};
+
+struct pipe_context *
+fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
+       struct pipe_context *pctx;
+
+       if (!fd5_ctx)
+               return NULL;
+
+       pctx = &fd5_ctx->base.base;
+
+       fd5_ctx->base.dev = fd_device_ref(screen->dev);
+       fd5_ctx->base.screen = fd_screen(pscreen);
+
+       pctx->destroy = fd5_context_destroy;
+       pctx->create_blend_state = fd5_blend_state_create;
+       pctx->create_rasterizer_state = fd5_rasterizer_state_create;
+       pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
+
+       fd5_draw_init(pctx);
+       fd5_gmem_init(pctx);
+       fd5_texture_init(pctx);
+       fd5_prog_init(pctx);
+       fd5_emit_init(pctx);
+
+       pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv);
+       if (!pctx)
+               return NULL;
+
+       fd5_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd5_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd_context_setup_common_vbos(&fd5_ctx->base);
+
+       fd5_query_context_init(pctx);
+
+       fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
+                                                         PIPE_USAGE_STREAM);
+
+       return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.h b/src/gallium/drivers/freedreno/a5xx/fd5_context.h
new file mode 100644 (file)
index 0000000..30a11d0
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_CONTEXT_H_
+#define FD5_CONTEXT_H_
+
+#include "util/u_upload_mgr.h"
+
+#include "freedreno_drmif.h"
+
+#include "freedreno_context.h"
+
+#include "ir3_shader.h"
+
+struct fd5_context {
+       struct fd_context base;
+
+       struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+
+       /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
+        * could combine it with another allocation.
+        *
+        * (upper area used as scratch bo.. see fd5_query)
+        *
+        * XXX remove if unneeded after binning r/e..
+        */
+       struct fd_bo *vsc_size_mem;
+
+       /* TODO not sure what this is for.. */
+       struct fd_bo *blit_mem;
+
+       struct u_upload_mgr *border_color_uploader;
+       struct pipe_resource *border_color_buf;
+
+       /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+       bool vsaturate, fsaturate;
+
+       /* bitmask of sampler which needs coords clamped for vertex
+        * shader:
+        */
+       uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
+
+       /* bitmask of sampler which needs coords clamped for frag
+        * shader:
+        */
+       uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
+
+       /* bitmask of samplers which need astc srgb workaround: */
+       uint16_t vastc_srgb, fastc_srgb;
+
+       /* some state changes require a different shader variant.  Keep
+        * track of this so we know when we need to re-emit shader state
+        * due to variant change.  See fixup_shader_state()
+        */
+       struct ir3_shader_key last_key;
+};
+
+static inline struct fd5_context *
+fd5_context(struct fd_context *ctx)
+{
+       return (struct fd5_context *)ctx;
+}
+
+struct pipe_context *
+fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+
+#endif /* FD5_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
new file mode 100644 (file)
index 0000000..e6b42bf
--- /dev/null
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd5_draw.h"
+#include "fd5_context.h"
+#include "fd5_emit.h"
+#include "fd5_program.h"
+#include "fd5_format.h"
+#include "fd5_zsa.h"
+
+
+static void
+draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd5_emit *emit)
+{
+       const struct pipe_draw_info *info = emit->info;
+       enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+
+       fd5_emit_state(ctx, ring, emit);
+
+       if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+               fd5_emit_vertex_bufs(ring, emit);
+
+       OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
+       OUT_RING(ring, info->indexed ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */
+       OUT_RING(ring, info->start_instance);   /* ??? UNKNOWN_2209 */
+
+       OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+                       info->restart_index : 0xffffffff);
+
+       /* points + psize -> spritelist: */
+       if (ctx->rasterizer->point_size_per_vertex &&
+                       fd5_emit_get_vp(emit)->writes_psize &&
+                       (info->mode == PIPE_PRIM_POINTS))
+               primtype = DI_PT_POINTLIST_PSIZE;
+
+       fd5_emit_render_cntl(ctx, false);
+       fd5_draw_emit(ctx->batch, ring, primtype,
+                       emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+                       info);
+}
+
+/* fixup dirty shader state in case some "unrelated" (from the state-
+ * tracker's perspective) state change causes us to switch to a
+ * different variant.
+ */
+static void
+fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
+{
+       struct fd5_context *fd5_ctx = fd5_context(ctx);
+       struct ir3_shader_key *last_key = &fd5_ctx->last_key;
+
+       if (!ir3_shader_key_equal(last_key, key)) {
+               if (last_key->has_per_samp || key->has_per_samp) {
+                       if ((last_key->vsaturate_s != key->vsaturate_s) ||
+                                       (last_key->vsaturate_t != key->vsaturate_t) ||
+                                       (last_key->vsaturate_r != key->vsaturate_r) ||
+                                       (last_key->vastc_srgb != key->vastc_srgb))
+                               ctx->dirty |= FD_SHADER_DIRTY_VP;
+
+                       if ((last_key->fsaturate_s != key->fsaturate_s) ||
+                                       (last_key->fsaturate_t != key->fsaturate_t) ||
+                                       (last_key->fsaturate_r != key->fsaturate_r) ||
+                                       (last_key->fastc_srgb != key->fastc_srgb))
+                               ctx->dirty |= FD_SHADER_DIRTY_FP;
+               }
+
+               if (last_key->vclamp_color != key->vclamp_color)
+                       ctx->dirty |= FD_SHADER_DIRTY_VP;
+
+               if (last_key->fclamp_color != key->fclamp_color)
+                       ctx->dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->color_two_side != key->color_two_side)
+                       ctx->dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->half_precision != key->half_precision)
+                       ctx->dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->rasterflat != key->rasterflat)
+                       ctx->dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->ucp_enables != key->ucp_enables)
+                       ctx->dirty |= FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP;
+
+               fd5_ctx->last_key = *key;
+       }
+}
+
+static bool
+fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+       struct fd5_context *fd5_ctx = fd5_context(ctx);
+       struct fd5_emit emit = {
+               .debug = &ctx->debug,
+               .vtx  = &ctx->vtx,
+               .prog = &ctx->prog,
+               .info = info,
+               .key = {
+                       .color_two_side = ctx->rasterizer->light_twoside,
+                       .vclamp_color = ctx->rasterizer->clamp_vertex_color,
+                       .fclamp_color = ctx->rasterizer->clamp_fragment_color,
+                       .rasterflat = ctx->rasterizer->flatshade,
+                       .half_precision = ctx->in_blit &&
+                                       fd_half_precision(&ctx->batch->framebuffer),
+                       .ucp_enables = ctx->rasterizer->clip_plane_enable,
+                       .has_per_samp = (fd5_ctx->fsaturate || fd5_ctx->vsaturate ||
+                                       fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb),
+                       .vsaturate_s = fd5_ctx->vsaturate_s,
+                       .vsaturate_t = fd5_ctx->vsaturate_t,
+                       .vsaturate_r = fd5_ctx->vsaturate_r,
+                       .fsaturate_s = fd5_ctx->fsaturate_s,
+                       .fsaturate_t = fd5_ctx->fsaturate_t,
+                       .fsaturate_r = fd5_ctx->fsaturate_r,
+                       .vastc_srgb = fd5_ctx->vastc_srgb,
+                       .fastc_srgb = fd5_ctx->fastc_srgb,
+               },
+               .rasterflat = ctx->rasterizer->flatshade,
+               .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+               .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+       };
+
+       fixup_shader_state(ctx, &emit.key);
+
+       unsigned dirty = ctx->dirty;
+
+       /* do regular pass first, since that is more likely to fail compiling: */
+
+       if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit)))
+               return false;
+
+       emit.key.binning_pass = false;
+       emit.dirty = dirty;
+
+       draw_impl(ctx, ctx->batch->draw, &emit);
+
+//     /* and now binning pass: */
+//     emit.key.binning_pass = true;
+//     emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+//     emit.vp = NULL;   /* we changed key so need to refetch vp */
+//     emit.fp = NULL;
+//     draw_impl(ctx, ctx->batch->binning, &emit);
+
+       return true;
+}
+
+static void
+fd5_clear(struct fd_context *ctx, unsigned buffers,
+               const union pipe_color_union *color, double depth, unsigned stencil)
+{
+       struct fd_ringbuffer *ring = ctx->batch->draw;
+       struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+       struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+       /* TODO handle scissor.. or fallback to slow-clear? */
+
+       ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+       ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+       ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+       ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+
+       fd5_emit_render_cntl(ctx, true);
+
+       if (buffers & PIPE_CLEAR_COLOR) {
+               for (int i = 0; i < pfb->nr_cbufs; i++) {
+                       union util_color uc = {0};
+
+                       if (!pfb->cbufs[i])
+                               continue;
+
+                       if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                               continue;
+
+                       // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
+                       float f[4];
+                       switch (fd5_pipe2swap(pfb->cbufs[i]->format)) {
+                       case WZYX:
+                               f[0] = color->f[0];
+                               f[1] = color->f[1];
+                               f[2] = color->f[2];
+                               f[3] = color->f[3];
+                               break;
+                       case WXYZ:
+                               f[2] = color->f[0];
+                               f[1] = color->f[1];
+                               f[0] = color->f[2];
+                               f[3] = color->f[3];
+                               break;
+                       case ZYXW:
+                               f[3] = color->f[0];
+                               f[0] = color->f[1];
+                               f[1] = color->f[2];
+                               f[2] = color->f[3];
+                               break;
+                       case XYZW:
+                               f[3] = color->f[0];
+                               f[2] = color->f[1];
+                               f[1] = color->f[2];
+                               f[0] = color->f[3];
+                               break;
+                       }
+                       util_pack_color(f, pfb->cbufs[i]->format, &uc);
+
+                       OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+                       OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
+
+                       OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+                       OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
+                                       A5XX_RB_CLEAR_CNTL_MASK(0xf));
+
+                       OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
+                       OUT_RING(ring, uc.ui[0]);  /* RB_CLEAR_COLOR_DW0 */
+                       OUT_RING(ring, uc.ui[1]);  /* RB_CLEAR_COLOR_DW1 */
+                       OUT_RING(ring, uc.ui[2]);  /* RB_CLEAR_COLOR_DW2 */
+                       OUT_RING(ring, uc.ui[3]);  /* RB_CLEAR_COLOR_DW3 */
+
+                       fd5_emit_blit(ctx, ring);
+               }
+       }
+
+       if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+               uint32_t clear =
+                       util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
+               uint32_t mask = 0;
+
+               if (buffers & PIPE_CLEAR_DEPTH)
+                       mask |= 0x1;
+
+               if (buffers & PIPE_CLEAR_STENCIL)
+                       mask |= 0x2;
+
+               OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+               OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
+
+               OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+               OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
+                               A5XX_RB_CLEAR_CNTL_MASK(mask));
+
+               OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+               OUT_RING(ring, clear);    /* RB_CLEAR_COLOR_DW0 */
+
+               fd5_emit_blit(ctx, ring);
+       }
+
+       /* disable fast clear to not interfere w/ gmem->mem, etc.. */
+       OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* RB_CLEAR_CNTL */
+}
+
+void
+fd5_draw_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->draw_vbo = fd5_draw_vbo;
+       ctx->clear = fd5_clear;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h
new file mode 100644 (file)
index 0000000..677bedf
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_DRAW_H_
+#define FD5_DRAW_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_draw.h"
+
+/* some bits in common w/ a4xx: */
+#include "a4xx/fd4_draw.h"
+
+void fd5_draw_init(struct pipe_context *pctx);
+
+static inline void
+fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
+               enum pc_di_primtype primtype,
+               enum pc_di_vis_cull_mode vismode,
+               enum pc_di_src_sel src_sel, uint32_t count,
+               uint32_t instances, enum a4xx_index_size idx_type,
+               uint32_t idx_size, uint32_t idx_offset,
+               struct pipe_resource *idx_buffer)
+{
+       /* for debug after a lock up, write a unique counter value
+        * to scratch7 for each draw, to make it easier to match up
+        * register dumps to cmdstream.  The combination of IB
+        * (scratch6) and DRAW is enough to "triangulate" the
+        * particular draw that caused lockup.
+        */
+       emit_marker5(ring, 7);
+
+       OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
+       if (vismode == USE_VISIBILITY) {
+               /* leave vis mode blank for now, it will be patched up when
+                * we know if we are binning or not
+                */
+               OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+                               &batch->draw_patches);
+       } else {
+               OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+       }
+       OUT_RING(ring, instances);         /* NumInstances */
+       OUT_RING(ring, count);             /* NumIndices */
+       if (idx_buffer) {
+               OUT_RING(ring, 0x0);           /* XXX */
+               OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
+               OUT_RING (ring, idx_size);
+       }
+
+       emit_marker5(ring, 7);
+
+       fd_reset_wfi(batch);
+}
+
+static inline void
+fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
+               enum pc_di_primtype primtype,
+               enum pc_di_vis_cull_mode vismode,
+               const struct pipe_draw_info *info)
+{
+       struct pipe_resource *idx_buffer = NULL;
+       enum a4xx_index_size idx_type;
+       enum pc_di_src_sel src_sel;
+       uint32_t idx_size, idx_offset;
+
+       if (info->indexed) {
+               struct pipe_index_buffer *idx = &batch->ctx->indexbuf;
+
+               assert(!idx->user_buffer);
+
+               idx_buffer = idx->buffer;
+               idx_type = fd4_size2indextype(idx->index_size);
+               idx_size = idx->index_size * info->count;
+               idx_offset = idx->offset + (info->start * idx->index_size);
+               src_sel = DI_SRC_SEL_DMA;
+       } else {
+               idx_buffer = NULL;
+               idx_type = INDEX4_SIZE_32_BIT;
+               idx_size = 0;
+               idx_offset = 0;
+               src_sel = DI_SRC_SEL_AUTO_INDEX;
+       }
+
+       fd5_draw(batch, ring, primtype, vismode, src_sel,
+                       info->count, info->instance_count,
+                       idx_type, idx_size, idx_offset, idx_buffer);
+}
+
+#endif /* FD5_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
new file mode 100644 (file)
index 0000000..edb1f4f
--- /dev/null
@@ -0,0 +1,729 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+#include "util/u_viewport.h"
+
+#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
+
+#include "fd5_emit.h"
+#include "fd5_blend.h"
+#include "fd5_context.h"
+#include "fd5_program.h"
+#include "fd5_rasterizer.h"
+#include "fd5_texture.h"
+#include "fd5_format.h"
+#include "fd5_zsa.h"
+
+static const enum adreno_state_block sb[] = {
+       [SHADER_VERTEX]   = SB_VERT_SHADER,
+       [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
+/* regid:          base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords:     size of const value buffer
+ */
+static void
+fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+               uint32_t regid, uint32_t offset, uint32_t sizedwords,
+               const uint32_t *dwords, struct pipe_resource *prsc)
+{
+       uint32_t i, sz;
+       enum adreno_state_src src;
+
+       debug_assert((regid % 4) == 0);
+       debug_assert((sizedwords % 4) == 0);
+
+       if (prsc) {
+               sz = 0;
+               src = 0x2;  // TODO ??
+       } else {
+               sz = sizedwords;
+               src = SS_DIRECT;
+       }
+
+       OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+                       CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
+       if (prsc) {
+               struct fd_bo *bo = fd_resource(prsc)->bo;
+               OUT_RELOC(ring, bo, offset,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+       } else {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+               OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
+               dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+       }
+       for (i = 0; i < sz; i++) {
+               OUT_RING(ring, dwords[i]);
+       }
+}
+
+static void
+fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+               uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
+{
+       uint32_t i;
+
+       debug_assert((regid % 4) == 0);
+       debug_assert((num % 4) == 0);
+
+       OUT_PKT7(ring, CP_LOAD_STATE, 3 + num);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+                       CP_LOAD_STATE_0_NUM_UNIT(num/4));
+       OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                       CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+       OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
+
+       for (i = 0; i < num; i++) {
+               if (prscs[i]) {
+                       if (write) {
+                               OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
+                       } else {
+                               OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
+                       }
+               } else {
+                       OUT_RING(ring, 0xbad00000 | (i << 16));
+               }
+       }
+}
+
+static void
+emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+{
+       unsigned i;
+
+       if (tex->num_samplers > 0) {
+               /* output sampler state: */
+               OUT_PKT7(ring, CP_LOAD_STATE, 3 + (4 * tex->num_samplers));
+               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+               OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                               CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
+               for (i = 0; i < tex->num_samplers; i++) {
+                       static const struct fd5_sampler_stateobj dummy_sampler = {};
+                       const struct fd5_sampler_stateobj *sampler = tex->samplers[i] ?
+                                       fd5_sampler_stateobj(tex->samplers[i]) :
+                                       &dummy_sampler;
+                       OUT_RING(ring, sampler->texsamp0);
+                       OUT_RING(ring, sampler->texsamp1);
+                       OUT_RING(ring, sampler->texsamp2);
+                       OUT_RING(ring, sampler->texsamp3);
+               }
+       }
+
+       if (tex->num_textures > 0) {
+               unsigned num_textures = tex->num_textures;
+
+               /* emit texture state: */
+               OUT_PKT7(ring, CP_LOAD_STATE, 3 + (12 * num_textures));
+               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE_0_NUM_UNIT(num_textures));
+               OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                               CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
+               for (i = 0; i < tex->num_textures; i++) {
+                       static const struct fd5_pipe_sampler_view dummy_view = {};
+                       const struct fd5_pipe_sampler_view *view = tex->textures[i] ?
+                                       fd5_pipe_sampler_view(tex->textures[i]) :
+                                       &dummy_view;
+
+                       OUT_RING(ring, view->texconst0);
+                       OUT_RING(ring, view->texconst1);
+                       OUT_RING(ring, view->texconst2);
+                       OUT_RING(ring, view->texconst3);
+                       if (view->base.texture) {
+                               struct fd_resource *rsc = fd_resource(view->base.texture);
+                               OUT_RELOC(ring, rsc->bo, view->offset,
+                                               (uint64_t)view->texconst5 << 32, 0);
+                       } else {
+                               OUT_RING(ring, 0x00000000);
+                               OUT_RING(ring, view->texconst5);
+                       }
+                       OUT_RING(ring, view->texconst6);
+                       OUT_RING(ring, view->texconst7);
+                       OUT_RING(ring, view->texconst8);
+                       OUT_RING(ring, view->texconst9);
+                       OUT_RING(ring, view->texconst10);
+                       OUT_RING(ring, view->texconst11);
+               }
+       }
+}
+
+void
+fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
+{
+       int32_t i, j;
+       const struct fd_vertex_state *vtx = emit->vtx;
+       const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
+
+       for (i = 0, j = 0; i <= vp->inputs_count; i++) {
+               if (vp->inputs[i].sysval)
+                       continue;
+               if (vp->inputs[i].compmask) {
+                       struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+                       const struct pipe_vertex_buffer *vb =
+                                       &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+                       struct fd_resource *rsc = fd_resource(vb->buffer);
+                       enum pipe_format pfmt = elem->src_format;
+                       enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
+                       uint32_t off = vb->buffer_offset + elem->src_offset;
+                       uint32_t size = fd_bo_size(rsc->bo) - off;
+                       debug_assert(fmt != ~0);
+
+                       OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
+                       OUT_RELOC(ring, rsc->bo, off, 0, 0);
+                       OUT_RING(ring, size);           /* VFD_FETCH[j].SIZE */
+                       OUT_RING(ring, vb->stride);     /* VFD_FETCH[j].STRIDE */
+
+                       OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
+                       OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) |
+                                       A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                                       A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)));
+                       OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
+
+                       OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
+                       OUT_RING(ring, A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+                                       A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
+
+                       j++;
+               }
+       }
+
+       OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1);
+       OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j));
+}
+
+void
+fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd5_emit *emit)
+{
+       const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
+       const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
+       uint32_t dirty = emit->dirty;
+
+       emit_marker5(ring, 5);
+
+       if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
+               struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+               unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
+
+               for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+                       mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+               }
+
+               OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1);
+               OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+                               A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
+               struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+               struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+               uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+               if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+                       rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST;
+
+               OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1);
+               OUT_RING(ring, rb_alpha_control);
+
+               OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1);
+               OUT_RING(ring, zsa->rb_stencil_control);
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+               struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+               struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+               OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 1);
+               OUT_RING(ring, zsa->rb_stencilrefmask |
+                               A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
+               OUT_RING(ring, zsa->gras_su_depth_plane_cntl);
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+               struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+               bool fragz = fp->has_kill | fp->writes_pos;
+
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
+               OUT_RING(ring, zsa->rb_depth_cntl);
+
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1);
+               OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z));
+       }
+
+       if (dirty & FD_DIRTY_RASTERIZER) {
+               struct fd5_rasterizer_stateobj *rasterizer =
+                               fd5_rasterizer_stateobj(ctx->rasterizer);
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+               OUT_RING(ring, rasterizer->gras_su_cntl);
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
+               OUT_RING(ring, rasterizer->gras_su_point_minmax);
+               OUT_RING(ring, rasterizer->gras_su_point_size);
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
+       }
+
+       /* NOTE: since primitive_restart is not actually part of any
+        * state object, we need to make sure that we always emit
+        * PRIM_VTX_CNTL.. either that or be more clever and detect
+        * when it changes.
+        */
+       if (emit->info) {
+               struct fd5_rasterizer_stateobj *rast =
+                       fd5_rasterizer_stateobj(ctx->rasterizer);
+               uint32_t val = rast->pc_prim_vtx_cntl;
+
+               val |= COND(vp->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE);
+
+               OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
+               OUT_RING(ring, val);
+       }
+
+       if (dirty & FD_DIRTY_SCISSOR) {
+               struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
+               OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) |
+                               A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny));
+               OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) |
+                               A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1));
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
+               OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
+                               A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
+               OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
+                               A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
+
+               ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+               ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+               ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+               ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+       }
+
+       if (dirty & FD_DIRTY_VIEWPORT) {
+               fd_wfi(ctx->batch, ring);
+               OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+               OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+       }
+
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+               struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+               unsigned n = pfb->nr_cbufs;
+               /* if we have depth/stencil, we need at least on MRT: */
+               if (pfb->zsbuf)
+                       n = MAX2(1, n);
+               fd5_program_emit(ring, emit, n, pfb->cbufs);
+       }
+
+       if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+               ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
+               if (!emit->key.binning_pass)
+                       ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
+       }
+
+       if ((dirty & FD_DIRTY_BLEND)) {
+               struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+               uint32_t i;
+
+               for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+                       enum pipe_format format = pipe_surface_format(
+                                       ctx->batch->framebuffer.cbufs[i]);
+                       bool is_int = util_format_is_pure_integer(format);
+                       bool has_alpha = util_format_has_alpha(format);
+                       uint32_t control = blend->rb_mrt[i].control;
+                       uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+
+                       if (is_int) {
+                               control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+//                             control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+                       }
+
+                       if (has_alpha) {
+                               blend_control |= blend->rb_mrt[i].blend_control_rgb;
+                       } else {
+                               blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+                               control &= ~A5XX_RB_MRT_CONTROL_BLEND2;
+                       }
+
+                       OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1);
+                       OUT_RING(ring, control);
+
+                       OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1);
+                       OUT_RING(ring, blend_control);
+               }
+
+               OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1);
+               OUT_RING(ring, blend->rb_blend_cntl |
+                               A5XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff));
+
+               OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1);
+               OUT_RING(ring, 0x00000100);
+       }
+
+       if (dirty & FD_DIRTY_BLEND_COLOR) {
+               struct pipe_blend_color *bcolor = &ctx->blend_color;
+
+               OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8);
+               OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
+                               A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
+                               A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
+               OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0]));
+               OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
+                               A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
+                               A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
+               OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1]));
+               OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
+                               A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
+                               A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
+               OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
+               OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
+                               A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
+                               A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
+               OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
+       }
+
+       if (dirty & FD_DIRTY_VERTTEX) {
+               if (vp->has_samp) {
+                       emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
+                       OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
+                       OUT_RING(ring, ctx->verttex.num_textures);
+               } else {
+                       dirty &= ~FD_DIRTY_VERTTEX;
+               }
+       }
+
+       if (dirty & FD_DIRTY_FRAGTEX) {
+               if (fp->has_samp) {
+                       emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
+                       OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
+                       OUT_RING(ring, ctx->fragtex.num_textures);
+               } else {
+                       dirty &= ~FD_DIRTY_FRAGTEX;
+               }
+       }
+
+       ctx->dirty &= ~dirty;
+}
+
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+       struct fd_context *ctx = batch->ctx;
+
+       fd5_set_render_mode(ctx, ring, BYPASS);
+       fd5_cache_flush(batch, ring);
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+       OUT_RING(ring, 0xfffff);
+
+/*
+t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
+0000000500024048:               70d08003 00000000 001c5000 00000005
+t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
+0000000500024058:               70d08003 00000010 001c7000 00000005
+
+t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
+0000000500024068:               70268000
+*/
+
+       OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, 0xffffffff);
+
+       OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
+       OUT_RING(ring, 0x00000012);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
+       OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) |
+                       A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0));
+       OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5));
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_SC_SCREEN_SCISSOR_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1);
+       OUT_RING(ring, 0);            /* SP_VS_CONFIG_MAX_CONST */
+
+       OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1);
+       OUT_RING(ring, 0);            /* SP_FS_CONFIG_MAX_CONST */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E292 */
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E293 */
+
+       OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
+       OUT_RING(ring, 0x00000044);   /* RB_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1);
+       OUT_RING(ring, 0x00100000);   /* RB_DBG_ECO_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* VFD_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1);
+       OUT_RING(ring, 0x0000001f);   /* PC_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
+       OUT_RING(ring, 0x0000001e);   /* SP_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+       OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
+       OUT_RING(ring, 0x00000544);   /* TPL1_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2);
+       OUT_RING(ring, 0x00000080);   /* HLSQ_TIMEOUT_THRESHOLD_0 */
+       OUT_RING(ring, 0x00000000);   /* HLSQ_TIMEOUT_THRESHOLD_1 */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
+       OUT_RING(ring, 0x00000400);   /* VPC_DBG_ECO_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
+       OUT_RING(ring, 0x00000001);   /* HLSQ_MODE_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* VPC_MODE_CNTL */
+
+       /* we don't use this yet.. probably best to disable.. */
+       OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+       OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+                       CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+                       CP_SET_DRAW_STATE__0_GROUP_ID(0));
+       OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+       OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+       /* other regs not used (yet?) and always seem to have same value: */
+       OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+       OUT_RING(ring, 0x00000080);   /* GRAS_CL_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_SC_BIN_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_SC_BIN_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1);
+       OUT_RING(ring, 0x000000ff);   /* VPC_FS_PRIMITIVEID_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
+       OUT_RING(ring, 0x00000001);   /* VPC_SO_OVERRIDE */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO_0, 3);
+       OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_BASE_LO_0 */
+       OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_BASE_HI_0 */
+       OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_SIZE_0 */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO_0, 2);
+       OUT_RING(ring, 0x00000000);   /* VPC_SO_FLUSH_BASE_LO_0 */
+       OUT_RING(ring, 0x00000000);   /* VPC_SO_FLUSH_BASE_HI_0 */
+
+       OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1);
+       OUT_RING(ring, 0x00000000);   /* PC_GS_PARAM */
+
+       OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1);
+       OUT_RING(ring, 0x00000000);   /* PC_HS_PARAM */
+
+       OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* TPL1_TP_FS_ROTATION_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E001, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E001 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E004 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E093, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E093 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E1C7, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E1C7 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E29A, 1);
+       OUT_RING(ring, 0x00ffff00);   /* UNKNOWN_E29A */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2A1, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E2A1 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2AB, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E2AB */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E389, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E389 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E38D, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E38D */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E5AB */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1);
+       OUT_RING(ring, 0x00000000);   /* UNKNOWN_E5C2 */
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2AE, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2B2, 6);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2B9, 6);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2C0, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E600, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E640, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       // TODO hacks.. these should not be hardcoded:
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
+       OUT_RING(ring, 0x00000008);   /* GRAS_SC_CNTL */
+
+       fd_hw_query_enable(batch, ring);
+}
+
+static void
+fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
+{
+       __OUT_IB5(ring, target);
+}
+
+void
+fd5_emit_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->emit_const = fd5_emit_const;
+       ctx->emit_const_bo = fd5_emit_const_bo;
+       ctx->emit_ib = fd5_emit_ib;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
new file mode 100644 (file)
index 0000000..2c6b717
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_EMIT_H
+#define FD5_EMIT_H
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "fd5_context.h"
+#include "fd5_format.h"
+#include "fd5_program.h"
+#include "ir3_shader.h"
+
+struct fd_ringbuffer;
+
+/* grouped together emit-state for prog/vertex/state emit: */
+struct fd5_emit {
+       struct pipe_debug_callback *debug;
+       const struct fd_vertex_state *vtx;
+       const struct fd_program_stateobj *prog;
+       const struct pipe_draw_info *info;
+       struct ir3_shader_key key;
+       uint32_t dirty;
+
+       uint32_t sprite_coord_enable;  /* bitmask */
+       bool sprite_coord_mode;
+       bool rasterflat;
+       bool no_decode_srgb;
+
+       /* cached to avoid repeated lookups of same variants: */
+       const struct ir3_shader_variant *vp, *fp;
+       /* TODO: other shader stages.. */
+};
+
+static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf)
+{
+       if (!surf)
+               return 0;
+       return fd5_pipe2color(surf->format);
+}
+
+static inline const struct ir3_shader_variant *
+fd5_emit_get_vp(struct fd5_emit *emit)
+{
+       if (!emit->vp) {
+               struct fd5_shader_stateobj *so = emit->prog->vp;
+               emit->vp = ir3_shader_variant(so->shader, emit->key, emit->debug);
+       }
+       return emit->vp;
+}
+
+static inline const struct ir3_shader_variant *
+fd5_emit_get_fp(struct fd5_emit *emit)
+{
+       if (!emit->fp) {
+               if (emit->key.binning_pass) {
+                       /* use dummy stateobj to simplify binning vs non-binning: */
+                       static const struct ir3_shader_variant binning_fp = {};
+                       emit->fp = &binning_fp;
+               } else {
+                       struct fd5_shader_stateobj *so = emit->prog->fp;
+                       emit->fp = ir3_shader_variant(so->shader, emit->key, emit->debug);
+               }
+       }
+       return emit->fp;
+}
+
+static inline void
+fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+       fd_reset_wfi(batch);
+       OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
+       OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MIN_LO */
+       OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MIN_HI */
+       OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MAX_LO */
+       OUT_RING(ring, 0x00000000);   /* UCHE_CACHE_INVALIDATE_MAX_HI */
+       OUT_RING(ring, 0x00000012);   /* UCHE_CACHE_INVALIDATE */
+       fd_wfi(batch, ring);
+}
+
+static inline void
+fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum render_mode_cmd mode)
+{
+       /* TODO add preemption support, gmem bypass, etc */
+       emit_marker5(ring, 7);
+       OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
+       OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
+       OUT_RING(ring, 0x00000000);   /* ADDR_LO */
+       OUT_RING(ring, 0x00000000);   /* ADDR_HI */
+       OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE));
+       OUT_RING(ring, 0x00000000);
+       emit_marker5(ring, 7);
+}
+
+static inline void
+fd5_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+       struct fd5_context *fd5_ctx = fd5_context(ctx);
+
+       emit_marker5(ring, 7);
+
+       OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+       OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(BLIT));
+       OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
+       OUT_RING(ring, 0x00000000);
+
+       emit_marker5(ring, 7);
+}
+
+static inline void
+fd5_emit_render_cntl(struct fd_context *ctx, bool blit)
+{
+       struct fd_ringbuffer *ring = ctx->batch->draw;
+
+       /* TODO eventually this partially depends on the pfb state, ie.
+        * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
+        * we could probably cache and just regenerate if framebuffer
+        * state is dirty (or something like that)..
+        *
+        * Other bits seem to depend on query state, like if samples-passed
+        * query is active.
+        */
+       OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+       OUT_RING(ring, 0x00000000 |   /* RB_RENDER_CNTL */
+                       COND(!blit, 0x8));
+}
+
+void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit);
+
+void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd5_emit *emit);
+
+void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
+
+void fd5_emit_init(struct pipe_context *pctx);
+
+#endif /* FD5_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/src/gallium/drivers/freedreno/a5xx/fd5_format.c
new file mode 100644 (file)
index 0000000..0e22839
--- /dev/null
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "fd5_format.h"
+
+
+/* Specifies the table of all the formats and their features. Also supplies
+ * the helpers that look up various data in those tables.
+ */
+
+struct fd5_format {
+       enum a5xx_vtx_fmt vtx;
+       enum a5xx_tex_fmt tex;
+       enum a5xx_color_fmt rb;
+       enum a3xx_color_swap swap;
+       boolean present;
+};
+
+#define RB5_NONE ~0
+
+/* vertex + texture */
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+       [PIPE_FORMAT_ ## pipe] = { \
+               .present = 1, \
+               .vtx = VFMT5_ ## fmt, \
+               .tex = TFMT5_ ## fmt, \
+               .rb = RB5_ ## rbfmt, \
+               .swap = swapfmt \
+       }
+
+/* texture-only */
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+       [PIPE_FORMAT_ ## pipe] = { \
+               .present = 1, \
+               .vtx = ~0, \
+               .tex = TFMT5_ ## fmt, \
+               .rb = RB5_ ## rbfmt, \
+               .swap = swapfmt \
+       }
+
+/* vertex-only */
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+       [PIPE_FORMAT_ ## pipe] = { \
+               .present = 1, \
+               .vtx = VFMT5_ ## fmt, \
+               .tex = ~0, \
+               .rb = RB5_ ## rbfmt, \
+               .swap = swapfmt \
+       }
+
+static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
+       /* 8-bit */
+       VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+//     VT(R8_SNORM,   8_SNORM, R8_SNORM, WZYX),
+//     VT(R8_UINT,    8_UINT,  R8_UINT,  WZYX),
+//     VT(R8_SINT,    8_SINT,  R8_SINT,  WZYX),
+       V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
+       V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),
+
+//     _T(A8_UNORM,   8_UNORM, A8_UNORM, WZYX),
+//     _T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+       _T(I8_UNORM,   8_UNORM, NONE,     WZYX),
+
+//     _T(A8_UINT,    8_UINT,  NONE,     WZYX),
+//     _T(A8_SINT,    8_SINT,  NONE,     WZYX),
+//     _T(L8_UINT,    8_UINT,  NONE,     WZYX),
+//     _T(L8_SINT,    8_SINT,  NONE,     WZYX),
+//     _T(I8_UINT,    8_UINT,  NONE,     WZYX),
+//     _T(I8_SINT,    8_SINT,  NONE,     WZYX),
+
+//     _T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
+
+       /* 16-bit */
+//     VT(R16_UNORM,   16_UNORM, R16_UNORM, WZYX),
+//     VT(R16_SNORM,   16_SNORM, R16_SNORM, WZYX),
+//     VT(R16_UINT,    16_UINT,  R16_UINT,  WZYX),
+//     VT(R16_SINT,    16_SINT,  R16_SINT,  WZYX),
+       V_(R16_USCALED, 16_UINT,  NONE,      WZYX),
+       V_(R16_SSCALED, 16_UINT,  NONE,      WZYX),
+       VT(R16_FLOAT,   16_FLOAT, R16_FLOAT, WZYX),
+
+//     _T(A16_UNORM,   16_UNORM, NONE,      WZYX),
+//     _T(A16_SNORM,   16_SNORM, NONE,      WZYX),
+//     _T(A16_UINT,    16_UINT,  NONE,      WZYX),
+//     _T(A16_SINT,    16_SINT,  NONE,      WZYX),
+//     _T(L16_UNORM,   16_UNORM, NONE,      WZYX),
+//     _T(L16_SNORM,   16_SNORM, NONE,      WZYX),
+//     _T(L16_UINT,    16_UINT,  NONE,      WZYX),
+//     _T(L16_SINT,    16_SINT,  NONE,      WZYX),
+//     _T(I16_UNORM,   16_UNORM, NONE,      WZYX),
+//     _T(I16_SNORM,   16_SNORM, NONE,      WZYX),
+//     _T(I16_UINT,    16_UINT,  NONE,      WZYX),
+//     _T(I16_SINT,    16_SINT,  NONE,      WZYX),
+
+//     VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
+//     VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
+//     VT(R8G8_UINT,    8_8_UINT,  R8G8_UINT,  WZYX),
+//     VT(R8G8_SINT,    8_8_SINT,  R8G8_SINT,  WZYX),
+       V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
+       V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),
+
+//     _T(L8A8_UINT,    8_8_UINT,  NONE,       WZYX),
+//     _T(L8A8_SINT,    8_8_SINT,  NONE,       WZYX),
+
+       _T(B5G6R5_UNORM,   5_6_5_UNORM,   R5G6B5_UNORM,   WXYZ),
+//     _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+//     _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+       _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
+
+       /* 24-bit */
+       V_(R8G8B8_UNORM,   8_8_8_UNORM, NONE, WZYX),
+       V_(R8G8B8_SNORM,   8_8_8_SNORM, NONE, WZYX),
+       V_(R8G8B8_UINT,    8_8_8_UINT,  NONE, WZYX),
+       V_(R8G8B8_SINT,    8_8_8_SINT,  NONE, WZYX),
+       V_(R8G8B8_USCALED, 8_8_8_UINT,  NONE, WZYX),
+       V_(R8G8B8_SSCALED, 8_8_8_SINT,  NONE, WZYX),
+
+       /* 32-bit */
+//     VT(R32_UINT,    32_UINT,  R32_UINT, WZYX),
+//     VT(R32_SINT,    32_SINT,  R32_SINT, WZYX),
+       V_(R32_USCALED, 32_UINT,  NONE,     WZYX),
+       V_(R32_SSCALED, 32_UINT,  NONE,     WZYX),
+       VT(R32_FLOAT,   32_FLOAT, R32_FLOAT,WZYX),
+       V_(R32_FIXED,   32_FIXED, NONE,     WZYX),
+
+//     _T(A32_UINT,    32_UINT,  NONE,     WZYX),
+//     _T(A32_SINT,    32_SINT,  NONE,     WZYX),
+//     _T(L32_UINT,    32_UINT,  NONE,     WZYX),
+//     _T(L32_SINT,    32_SINT,  NONE,     WZYX),
+//     _T(I32_UINT,    32_UINT,  NONE,     WZYX),
+//     _T(I32_SINT,    32_SINT,  NONE,     WZYX),
+
+//     VT(R16G16_UNORM,   16_16_UNORM, R16G16_UNORM, WZYX),
+//     VT(R16G16_SNORM,   16_16_SNORM, R16G16_SNORM, WZYX),
+//     VT(R16G16_UINT,    16_16_UINT,  R16G16_UINT,  WZYX),
+//     VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT,  WZYX),
+       V_(R16G16_USCALED, 16_16_UINT,  NONE,         WZYX),
+       V_(R16G16_SSCALED, 16_16_SINT,  NONE,         WZYX),
+       VT(R16G16_FLOAT,   16_16_FLOAT, R16G16_FLOAT, WZYX),
+
+//     _T(L16A16_UNORM,   16_16_UNORM, NONE,         WZYX),
+//     _T(L16A16_SNORM,   16_16_SNORM, NONE,         WZYX),
+//     _T(L16A16_UINT,    16_16_UINT,  NONE,         WZYX),
+//     _T(L16A16_SINT,    16_16_SINT,  NONE,         WZYX),
+
+       VT(R8G8B8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+       _T(R8G8B8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+       _T(R8G8B8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+       _T(R8G8B8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+//     VT(R8G8B8A8_SNORM,   8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX),
+//     VT(R8G8B8A8_UINT,    8_8_8_8_UINT,  R8G8B8A8_UINT,  WZYX),
+//     VT(R8G8B8A8_SINT,    8_8_8_8_SINT,  R8G8B8A8_SINT,  WZYX),
+       V_(R8G8B8A8_USCALED, 8_8_8_8_UINT,  NONE,           WZYX),
+       V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT,  NONE,           WZYX),
+
+       VT(B8G8R8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+       _T(B8G8R8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+       VT(B8G8R8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+       _T(B8G8R8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+
+       VT(A8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+       _T(X8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+       _T(A8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+       _T(X8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+
+       VT(A8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+       _T(X8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+       _T(A8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+       _T(X8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+
+//     VT(R10G10B10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX),
+//     VT(B10G10R10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+//     _T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+//     V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
+//     V_(B10G10R10A2_SNORM,   10_10_10_2_SNORM, NONE,              WXYZ),
+//     VT(R10G10B10A2_UINT,    10_10_10_2_UINT,  R10G10B10A2_UINT,  WZYX),
+//     VT(B10G10R10A2_UINT,    10_10_10_2_UINT,  R10G10B10A2_UINT,  WXYZ),
+//     V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
+//     V_(B10G10R10A2_USCALED, 10_10_10_2_UINT,  NONE,              WXYZ),
+//     V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
+//     V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WXYZ),
+
+//     VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+//     _T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),
+
+       _T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+       _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+       _T(Z32_FLOAT,         32_FLOAT,   R8G8B8A8_UNORM, WZYX),
+       _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX),
+
+       /* 48-bit */
+       V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
+       V_(R16G16B16_SNORM,   16_16_16_SNORM, NONE, WZYX),
+       V_(R16G16B16_UINT,    16_16_16_UINT,  NONE, WZYX),
+       V_(R16G16B16_SINT,    16_16_16_SINT,  NONE, WZYX),
+       V_(R16G16B16_USCALED, 16_16_16_UINT,  NONE, WZYX),
+       V_(R16G16B16_SSCALED, 16_16_16_SINT,  NONE, WZYX),
+       V_(R16G16B16_FLOAT,   16_16_16_FLOAT, NONE, WZYX),
+
+       /* 64-bit */
+//     VT(R16G16B16A16_UNORM,   16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+//     VT(R16G16B16X16_UNORM,   16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+//     VT(R16G16B16A16_SNORM,   16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+//     VT(R16G16B16X16_SNORM,   16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+//     VT(R16G16B16A16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+//     _T(R16G16B16X16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+//     VT(R16G16B16A16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+//     _T(R16G16B16X16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+       V_(R16G16B16A16_USCALED, 16_16_16_16_UINT,  NONE,               WZYX),
+       V_(R16G16B16A16_SSCALED, 16_16_16_16_SINT,  NONE,               WZYX),
+       VT(R16G16B16A16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+       _T(R16G16B16X16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+
+//     VT(R32G32_UINT,    32_32_UINT,  R32G32_UINT, WZYX),
+//     VT(R32G32_SINT,    32_32_SINT,  R32G32_SINT, WZYX),
+       V_(R32G32_USCALED, 32_32_UINT,  NONE,        WZYX),
+       V_(R32G32_SSCALED, 32_32_SINT,  NONE,        WZYX),
+       VT(R32G32_FLOAT,   32_32_FLOAT, R32G32_FLOAT,WZYX),
+//     V_(R32G32_FIXED,   32_32_FIXED, NONE,        WZYX),
+
+//     _T(L32A32_UINT,    32_32_UINT,  NONE,        WZYX),
+//     _T(L32A32_SINT,    32_32_SINT,  NONE,        WZYX),
+
+       /* 96-bit */
+//     VT(R32G32B32_UINT,    32_32_32_UINT,  NONE, WZYX),
+//     VT(R32G32B32_SINT,    32_32_32_SINT,  NONE, WZYX),
+       V_(R32G32B32_USCALED, 32_32_32_UINT,  NONE, WZYX),
+       V_(R32G32B32_SSCALED, 32_32_32_SINT,  NONE, WZYX),
+       V_(R32G32B32_FLOAT,   32_32_32_FLOAT, NONE, WZYX),
+       V_(R32G32B32_FIXED,   32_32_32_FIXED, NONE, WZYX),
+
+       /* 128-bit */
+       V_(R32G32B32A32_UINT,    32_32_32_32_UINT,  NONE,               WZYX),
+//     _T(R32G32B32X32_UINT,    32_32_32_32_UINT,  R32G32B32A32_UINT,  WZYX),
+//     VT(R32G32B32A32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+//     _T(R32G32B32X32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+       V_(R32G32B32A32_USCALED, 32_32_32_32_UINT,  NONE,               WZYX),
+       V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT,  NONE,               WZYX),
+       V_(R32G32B32A32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+       _T(R32G32B32X32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+       V_(R32G32B32A32_FIXED,   32_32_32_32_FIXED, NONE,               WZYX),
+
+       /* compressed */
+//     _T(ETC1_RGB8, ETC1, NONE, WZYX),
+//     _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+//     _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+//     _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+//     _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+//     _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+//     _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+//     _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+//     _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+//     _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+//     _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+//     _T(DXT1_RGB,   DXT1, NONE, WZYX),
+//     _T(DXT1_SRGB,  DXT1, NONE, WZYX),
+//     _T(DXT1_RGBA,  DXT1, NONE, WZYX),
+//     _T(DXT1_SRGBA, DXT1, NONE, WZYX),
+//     _T(DXT3_RGBA,  DXT3, NONE, WZYX),
+//     _T(DXT3_SRGBA, DXT3, NONE, WZYX),
+//     _T(DXT5_RGBA,  DXT5, NONE, WZYX),
+//     _T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+//     _T(BPTC_RGBA_UNORM, BPTC,        NONE, WZYX),
+//     _T(BPTC_SRGBA,      BPTC,        NONE, WZYX),
+//     _T(BPTC_RGB_FLOAT,  BPTC_FLOAT,  NONE, WZYX),
+//     _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
+
+//     _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+//     _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+//     _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+//     _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+//     _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+//     _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+//     _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+//     _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+
+//     _T(ASTC_4x4,   ASTC_4x4,   NONE, WZYX),
+//     _T(ASTC_5x4,   ASTC_5x4,   NONE, WZYX),
+//     _T(ASTC_5x5,   ASTC_5x5,   NONE, WZYX),
+//     _T(ASTC_6x5,   ASTC_6x5,   NONE, WZYX),
+//     _T(ASTC_6x6,   ASTC_6x6,   NONE, WZYX),
+//     _T(ASTC_8x5,   ASTC_8x5,   NONE, WZYX),
+//     _T(ASTC_8x6,   ASTC_8x6,   NONE, WZYX),
+//     _T(ASTC_8x8,   ASTC_8x8,   NONE, WZYX),
+//     _T(ASTC_10x5,  ASTC_10x5,  NONE, WZYX),
+//     _T(ASTC_10x6,  ASTC_10x6,  NONE, WZYX),
+//     _T(ASTC_10x8,  ASTC_10x8,  NONE, WZYX),
+//     _T(ASTC_10x10, ASTC_10x10, NONE, WZYX),
+//     _T(ASTC_12x10, ASTC_12x10, NONE, WZYX),
+//     _T(ASTC_12x12, ASTC_12x12, NONE, WZYX),
+
+//     _T(ASTC_4x4_SRGB,   ASTC_4x4,   NONE, WZYX),
+//     _T(ASTC_5x4_SRGB,   ASTC_5x4,   NONE, WZYX),
+//     _T(ASTC_5x5_SRGB,   ASTC_5x5,   NONE, WZYX),
+//     _T(ASTC_6x5_SRGB,   ASTC_6x5,   NONE, WZYX),
+//     _T(ASTC_6x6_SRGB,   ASTC_6x6,   NONE, WZYX),
+//     _T(ASTC_8x5_SRGB,   ASTC_8x5,   NONE, WZYX),
+//     _T(ASTC_8x6_SRGB,   ASTC_8x6,   NONE, WZYX),
+//     _T(ASTC_8x8_SRGB,   ASTC_8x8,   NONE, WZYX),
+//     _T(ASTC_10x5_SRGB,  ASTC_10x5,  NONE, WZYX),
+//     _T(ASTC_10x6_SRGB,  ASTC_10x6,  NONE, WZYX),
+//     _T(ASTC_10x8_SRGB,  ASTC_10x8,  NONE, WZYX),
+//     _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX),
+//     _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX),
+//     _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX),
+};
+
+/* convert pipe format to vertex buffer format: */
+enum a5xx_vtx_fmt
+fd5_pipe2vtx(enum pipe_format format)
+{
+       if (!formats[format].present)
+               return ~0;
+       return formats[format].vtx;
+}
+
+/* convert pipe format to texture sampler format: */
+enum a5xx_tex_fmt
+fd5_pipe2tex(enum pipe_format format)
+{
+       if (!formats[format].present)
+               return ~0;
+       return formats[format].tex;
+}
+
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a5xx_color_fmt
+fd5_pipe2color(enum pipe_format format)
+{
+       if (!formats[format].present)
+               return ~0;
+       return formats[format].rb;
+}
+
+enum a3xx_color_swap
+fd5_pipe2swap(enum pipe_format format)
+{
+       if (!formats[format].present)
+               return WZYX;
+       return formats[format].swap;
+}
+
+// XXX possibly same as a4xx..
+enum a5xx_tex_fetchsize
+fd5_pipe2fetchsize(enum pipe_format format)
+{
+       if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+               format = PIPE_FORMAT_Z32_FLOAT;
+
+       if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
+               return TFETCH5_16_BYTE;
+
+       switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
+       case 8:   return TFETCH5_1_BYTE;
+       case 16:  return TFETCH5_2_BYTE;
+       case 32:  return TFETCH5_4_BYTE;
+       case 64:  return TFETCH5_8_BYTE;
+       case 96:  return TFETCH5_1_BYTE; /* Does this matter? */
+       case 128: return TFETCH5_16_BYTE;
+       default:
+               debug_printf("Unknown block size for format %s: %d\n",
+                               util_format_name(format),
+                               util_format_get_blocksizebits(format));
+               return TFETCH5_1_BYTE;
+       }
+}
+
+enum a5xx_depth_format
+fd5_pipe2depth(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_Z16_UNORM:
+               return DEPTH5_16;
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+       case PIPE_FORMAT_X8Z24_UNORM:
+       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+               return DEPTH5_24_8;
+       case PIPE_FORMAT_Z32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+               return DEPTH5_32;
+       default:
+               return ~0;
+       }
+}
+
+static inline enum a5xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+       switch (swiz) {
+       default:
+       case PIPE_SWIZZLE_X: return A5XX_TEX_X;
+       case PIPE_SWIZZLE_Y: return A5XX_TEX_Y;
+       case PIPE_SWIZZLE_Z: return A5XX_TEX_Z;
+       case PIPE_SWIZZLE_W: return A5XX_TEX_W;
+       case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO;
+       case PIPE_SWIZZLE_1: return A5XX_TEX_ONE;
+       }
+}
+
+uint32_t
+fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+               unsigned swizzle_b, unsigned swizzle_a)
+{
+       const struct util_format_description *desc =
+                       util_format_description(format);
+       unsigned char swiz[4] = {
+                       swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+       }, rswiz[4];
+
+       util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+       return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+                       A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+                       A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+                       A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_format.h b/src/gallium/drivers/freedreno/a5xx/fd5_format.h
new file mode 100644 (file)
index 0000000..b052aa5
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_UTIL_H_
+#define FD5_UTIL_H_
+
+#include "freedreno_util.h"
+
+#include "a5xx.xml.h"
+
+enum a5xx_vtx_fmt fd5_pipe2vtx(enum pipe_format format);
+enum a5xx_tex_fmt fd5_pipe2tex(enum pipe_format format);
+enum a5xx_color_fmt fd5_pipe2color(enum pipe_format format);
+enum a3xx_color_swap fd5_pipe2swap(enum pipe_format format);
+enum a5xx_tex_fetchsize fd5_pipe2fetchsize(enum pipe_format format);
+enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format);
+
+uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+               unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+#endif /* FD5_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
new file mode 100644 (file)
index 0000000..d37c9d4
--- /dev/null
@@ -0,0 +1,482 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd5_gmem.h"
+#include "fd5_context.h"
+#include "fd5_draw.h"
+#include "fd5_emit.h"
+#include "fd5_program.h"
+#include "fd5_format.h"
+#include "fd5_zsa.h"
+
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+               struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem)
+{
+       enum a5xx_tile_mode tile_mode;
+       unsigned i;
+
+       if (gmem) {
+               tile_mode = TILE5_2;
+       } else {
+               tile_mode = TILE5_LINEAR;
+       }
+
+       for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
+               enum a5xx_color_fmt format = 0;
+               enum a3xx_color_swap swap = WZYX;
+               bool srgb = false;
+               struct fd_resource *rsc = NULL;
+               struct fd_resource_slice *slice = NULL;
+               uint32_t stride = 0;
+               uint32_t size = 0;
+               uint32_t base = 0;
+               uint32_t offset = 0;
+
+               if ((i < nr_bufs) && bufs[i]) {
+                       struct pipe_surface *psurf = bufs[i];
+                       enum pipe_format pformat = psurf->format;
+
+                       rsc = fd_resource(psurf->texture);
+
+                       slice = fd_resource_slice(rsc, psurf->u.tex.level);
+                       format = fd5_pipe2color(pformat);
+                       swap = fd5_pipe2swap(pformat);
+                       srgb = util_format_is_srgb(pformat);
+
+                       debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+                       offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                                       psurf->u.tex.first_layer);
+
+                       if (gmem) {
+                               stride = gmem->bin_w * rsc->cpp;
+                               size = stride * gmem->bin_h;
+                               base = gmem->cbuf_base[i];
+                       } else {
+                               stride = slice->pitch * rsc->cpp;
+                               size = slice->size0;
+                       }
+               }
+
+               OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
+               OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                               A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+                               A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+                               0x800 | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
+                               COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+               OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
+               OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
+               if (gmem || (i >= nr_bufs) || !bufs[i]) {
+                       OUT_RING(ring, base);           /* RB_MRT[i].BASE_LO */
+                       OUT_RING(ring, 0x00000000);     /* RB_MRT[i].BASE_HI */
+               } else {
+                       OUT_RELOCW(ring, rsc->bo, offset, 0, 0);  /* BASE_LO/HI */
+               }
+
+               OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
+               OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format));
+
+               /* when we support UBWC, these would be the system memory
+                * addr/pitch/etc:
+                */
+               OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
+               OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
+               OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
+               OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
+               OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
+       }
+}
+
+static void
+emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
+               struct fd_gmem_stateobj *gmem)
+{
+       if (zsbuf) {
+               struct fd_resource *rsc = fd_resource(zsbuf->texture);
+               enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
+               uint32_t cpp = rsc->cpp;
+               uint32_t stride = 0;
+               uint32_t size = 0;
+
+               if (gmem) {
+                       stride = cpp * gmem->bin_w;
+                       size = stride * gmem->bin_h;
+               } else {
+                       struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
+                       stride = slice->pitch * rsc->cpp;
+                       size = slice->size0;
+               }
+
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
+               OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
+               if (gmem) {
+                       OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
+                       OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
+               } else {
+                       OUT_RELOCW(ring, rsc->bo, 0, 0, 0);  /* RB_DEPTH_BUFFER_BASE_LO/HI */
+               }
+               OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
+               OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
+               OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
+
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
+
+               if (rsc->stencil) {
+                       if (gmem) {
+                               stride = 1 * gmem->bin_w;
+                               size = stride * gmem->bin_h;
+                       } else {
+                               struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0);
+                               stride = slice->pitch * rsc->cpp;
+                               size = slice->size0;
+                       }
+
+                       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
+                       OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
+                       if (gmem) {
+                               OUT_RING(ring, gmem->zsbuf_base[1]);  /* RB_STENCIL_BASE_LO */
+                               OUT_RING(ring, 0x00000000);           /* RB_STENCIL_BASE_HI */
+                       } else {
+                               OUT_RELOCW(ring, rsc->stencil->bo, 0, 0, 0);  /* RB_STENCIL_BASE_LO/HI */
+                       }
+                       OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
+                       OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
+               } else {
+                       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
+                       OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
+               }
+       } else {
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
+               OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_LO */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_HI */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_PITCH */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_ARRAY_PITCH */
+
+               OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
+               OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
+
+               OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
+               OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
+
+               OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
+               OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
+       }
+}
+
+static void
+patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
+{
+       unsigned i;
+       for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+               struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+               *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+       }
+       util_dynarray_resize(&batch->draw_patches, 0);
+}
+
+/* before first tile */
+static void
+fd5_emit_tile_init(struct fd_batch *batch)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+
+       fd5_emit_restore(batch, ring);
+
+       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+       OUT_RING(ring, UNK_26);
+
+       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+       OUT_RING(ring, 0x0);
+
+       OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
+       OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
+       OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
+
+       /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
+       fd_wfi(batch, ring);
+       OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+       OUT_RING(ring, 0x7c13c080);   /* RB_CCU_CNTL */
+
+/*
+opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords)
+ */
+
+       fd5_set_render_mode(batch->ctx, ring, GMEM);
+}
+
+/* before mem2gmem */
+static void
+fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+
+       uint32_t x1 = tile->xoff;
+       uint32_t y1 = tile->yoff;
+       uint32_t x2 = tile->xoff + tile->bin_w - 1;
+       uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+       OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
+                       A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
+       OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
+                       A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
+
+       OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+       OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
+                       A5XX_RB_RESOLVE_CNTL_1_Y(y1));
+       OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
+                       A5XX_RB_RESOLVE_CNTL_2_Y(y2));
+
+       OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
+       OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) |
+                       A5XX_RB_WINDOW_OFFSET_Y(y1));
+}
+
+
+/*
+ * transfer from system memory to gmem
+ */
+
+static void
+emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
+               struct pipe_surface *psurf, enum a5xx_blit_buf buf)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+       struct fd_resource_slice *slice;
+
+       slice = fd_resource_slice(rsc, psurf->u.tex.level);
+
+       debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+       OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
+
+       OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
+       OUT_RING(ring, 0x00000000);   /* RB_RESOLVE_CNTL_3 */
+       OUT_RING(ring, base);         /* RB_BLIT_DST_LO */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_DST_HI */
+       OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp));
+       OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
+
+       OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
+
+       fd5_emit_blit(batch->ctx, ring);
+}
+
+static void
+fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+       struct fd_context *ctx = batch->ctx;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+       /*
+        * setup mrt and zs with system memory base addresses:
+        */
+
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
+       emit_zs(ring, pfb->zsbuf, NULL);
+
+       OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
+                       A5XX_RB_CNTL_HEIGHT(gmem->bin_h) |
+                       A5XX_RB_CNTL_BYPASS);
+
+       if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
+               unsigned i;
+               for (i = 0; i < pfb->nr_cbufs; i++) {
+                       if (!pfb->cbufs[i])
+                               continue;
+                       if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
+                               continue;
+                       emit_mem2gmem_surf(batch, gmem->cbuf_base[i],
+                                       pfb->cbufs[i], BLIT_MRT0 + i);
+               }
+       }
+
+       if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+               struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+               // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces
+               // with z32_x24s8..
+               if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH))
+                       emit_mem2gmem_surf(batch, ctx->gmem.zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
+               if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL))
+                       emit_mem2gmem_surf(batch, ctx->gmem.zsbuf_base[1], pfb->zsbuf, BLIT_ZS);
+       }
+}
+
+
+/* before IB to rendering cmds: */
+static void
+fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+       struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+       OUT_RING(ring, 0x1);
+
+       OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
+                       A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
+
+       patch_draws(batch, IGNORE_VISIBILITY);
+
+       emit_zs(ring, pfb->zsbuf, gmem);
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
+
+       // TODO MSAA
+       OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
+       OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+       OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+                       A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
+
+       OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
+       OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+       OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+                       A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
+       OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
+       OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
+                       A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
+}
+
+
+/*
+ * transfer from gmem to system memory (ie. normal RAM)
+ */
+
+static void
+emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
+               struct pipe_surface *psurf, enum a5xx_blit_buf buf)
+{
+       struct fd_ringbuffer *ring = batch->gmem;
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+       struct fd_resource_slice *slice;
+       uint32_t offset;
+
+       slice = fd_resource_slice(rsc, psurf->u.tex.level);
+       offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                       psurf->u.tex.first_layer);
+
+       debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+       OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
+       OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
+
+       OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
+       OUT_RING(ring, 0x00000004);   /* XXX RB_RESOLVE_CNTL_3 */
+       OUT_RELOCW(ring, rsc->bo, offset, 0, 0);     /* RB_BLIT_DST_LO/HI */
+       OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp));
+       OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
+
+       OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
+
+       fd5_emit_blit(batch->ctx, ring);
+}
+
+static void
+fd5_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
+{
+       struct fd_context *ctx = batch->ctx;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
+       if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+               struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+               // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces
+               // with z32_x24s8..
+               if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
+                       emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
+               if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
+                       emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_ZS);
+       }
+
+       if (batch->resolve & FD_BUFFER_COLOR) {
+               unsigned i;
+               for (i = 0; i < pfb->nr_cbufs; i++) {
+                       if (!pfb->cbufs[i])
+                               continue;
+                       if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
+                               continue;
+                       emit_gmem2mem_surf(batch, gmem->cbuf_base[i],
+                                       pfb->cbufs[i], BLIT_MRT0 + i);
+               }
+       }
+}
+
+static void
+fd5_emit_tile_fini(struct fd_batch *batch)
+{
+       fd5_cache_flush(batch, batch->gmem);
+       fd5_set_render_mode(batch->ctx, batch->gmem, BYPASS);
+}
+
+void
+fd5_gmem_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->emit_tile_init = fd5_emit_tile_init;
+       ctx->emit_tile_prep = fd5_emit_tile_prep;
+       ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
+       ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
+       ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
+       ctx->emit_tile_fini = fd5_emit_tile_fini;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h
new file mode 100644 (file)
index 0000000..7794bfb
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_GMEM_H_
+#define FD5_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd5_gmem_init(struct pipe_context *pctx);
+
+#endif /* FD5_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
new file mode 100644 (file)
index 0000000..dbb1a7c
--- /dev/null
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/bitset.h"
+
+#include "freedreno_program.h"
+
+#include "fd5_program.h"
+#include "fd5_emit.h"
+#include "fd5_texture.h"
+#include "fd5_format.h"
+
+static void
+delete_shader_stateobj(struct fd5_shader_stateobj *so)
+{
+       ir3_shader_destroy(so->shader);
+       free(so);
+}
+
+static struct fd5_shader_stateobj *
+create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+               enum shader_t type)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct ir3_compiler *compiler = ctx->screen->compiler;
+       struct fd5_shader_stateobj *so = CALLOC_STRUCT(fd5_shader_stateobj);
+       so->shader = ir3_shader_create(compiler, cso, type, &ctx->debug);
+       return so;
+}
+
+static void *
+fd5_fp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+}
+
+static void
+fd5_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd5_shader_stateobj *so = hwcso;
+       delete_shader_stateobj(so);
+}
+
+static void *
+fd5_vp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+}
+
+static void
+fd5_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd5_shader_stateobj *so = hwcso;
+       delete_shader_stateobj(so);
+}
+
+static void
+emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
+{
+       const struct ir3_info *si = &so->info;
+       enum adreno_state_block sb;
+       enum adreno_state_src src;
+       uint32_t i, sz, *bin;
+
+       if (so->type == SHADER_VERTEX) {
+               sb = SB_VERT_SHADER;
+       } else {
+               sb = SB_FRAG_SHADER;
+       }
+
+       if (fd_mesa_debug & FD_DBG_DIRECT) {
+               sz = si->sizedwords;
+               src = SS_DIRECT;
+               bin = fd_bo_map(so->bo);
+       } else {
+               sz = 0;
+               src = 2;  // enums different on a5xx..
+               bin = NULL;
+       }
+
+       OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+       if (bin) {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+               OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
+       } else {
+               OUT_RELOC(ring, so->bo, 0,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+       }
+
+       /* for how clever coverity is, it is sometimes rather dull, and
+        * doesn't realize that the only case where bin==NULL, sz==0:
+        */
+       assume(bin || (sz == 0));
+
+       for (i = 0; i < sz; i++) {
+               OUT_RING(ring, bin[i]);
+       }
+}
+
+struct stage {
+       const struct ir3_shader_variant *v;
+       const struct ir3_info *i;
+       /* const sizes are in units of 4 * vec4 */
+       uint8_t constoff;
+       uint8_t constlen;
+       /* instr sizes are in units of 16 instructions */
+       uint8_t instroff;
+       uint8_t instrlen;
+};
+
+enum {
+       VS = 0,
+       FS = 1,
+       HS = 2,
+       DS = 3,
+       GS = 4,
+       MAX_STAGES
+};
+
+static void
+setup_stages(struct fd5_emit *emit, struct stage *s)
+{
+       unsigned i;
+
+       s[VS].v = fd5_emit_get_vp(emit);
+       s[FS].v = fd5_emit_get_fp(emit);
+
+       s[HS].v = s[DS].v = s[GS].v = NULL;  /* for now */
+
+       for (i = 0; i < MAX_STAGES; i++) {
+               if (s[i].v) {
+                       s[i].i = &s[i].v->info;
+                       /* constlen is in units of 4 * vec4: */
+                       s[i].constlen = align(s[i].v->constlen, 4) / 4;
+                       /* instrlen is already in units of 16 instr.. although
+                        * probably we should ditch that and not make the compiler
+                        * care about instruction group size of a3xx vs a5xx
+                        */
+                       s[i].instrlen = s[i].v->instrlen;
+               } else {
+                       s[i].i = NULL;
+                       s[i].constlen = 0;
+                       s[i].instrlen = 0;
+               }
+       }
+
+       /* NOTE: at least for gles2, blob partitions VS at bottom of const
+        * space and FS taking entire remaining space.  We probably don't
+        * need to do that the same way, but for now mimic what the blob
+        * does to make it easier to diff against register values from blob
+        *
+        * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
+        * is run from external memory.
+        */
+       if ((s[VS].instrlen + s[FS].instrlen) > 64) {
+               /* prioritize FS for internal memory: */
+               if (s[FS].instrlen < 64) {
+                       /* if FS can fit, kick VS out to external memory: */
+                       s[VS].instrlen = 0;
+               } else if (s[VS].instrlen < 64) {
+                       /* otherwise if VS can fit, kick out FS: */
+                       s[FS].instrlen = 0;
+               } else {
+                       /* neither can fit, run both from external memory: */
+                       s[VS].instrlen = 0;
+                       s[FS].instrlen = 0;
+               }
+       }
+
+       unsigned constoff = 0;
+       for (i = 0; i < MAX_STAGES; i++) {
+               s[i].constoff = constoff;
+               constoff += s[i].constlen;
+       }
+
+       s[VS].instroff = 0;
+       s[FS].instroff = 64 - s[FS].instrlen;
+       s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+}
+
+void
+fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit,
+               int nr, struct pipe_surface **bufs)
+{
+       struct stage s[MAX_STAGES];
+       uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
+       uint32_t face_regid, coord_regid, zwcoord_regid;
+       uint32_t vcoord_regid, vertex_regid, instance_regid;
+       int i, j;
+
+       debug_assert(nr <= ARRAY_SIZE(color_regid));
+
+       if (emit->key.binning_pass)
+               nr = 0;
+
+       setup_stages(emit, s);
+
+       pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+       posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+       psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
+       vertex_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+       instance_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);
+
+       if (s[FS].v->color0_mrt) {
+               color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+               color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+                       ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
+       } else {
+               color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+               color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+               color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+               color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+               color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+               color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+               color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+               color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
+       }
+
+       /* TODO get these dynamically: */
+       face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
+       coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
+       zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
+       vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0);
+
+       /* we could probably divide this up into things that need to be
+        * emitted if frag-prog is dirty vs if vert-prog is dirty..
+        */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONTROL_REG, 5);
+       OUT_RING(ring, A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                       A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) |
+                       COND(s[VS].v, A5XX_HLSQ_VS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                       A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) |
+                       COND(s[FS].v, A5XX_HLSQ_FS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                       A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) |
+                       COND(s[HS].v, A5XX_HLSQ_HS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                       A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) |
+                       COND(s[DS].v, A5XX_HLSQ_DS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                       A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) |
+                       COND(s[GS].v, A5XX_HLSQ_GS_CONTROL_REG_ENABLED));
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
+       OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
+       OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
+       OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
+       OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
+       OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));
+
+       OUT_PKT4(ring, REG_A5XX_SP_VS_CONTROL_REG, 5);
+       OUT_RING(ring, A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                       A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) |
+                       COND(s[VS].v, A5XX_SP_VS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                       A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) |
+                       COND(s[FS].v, A5XX_SP_FS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                       A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) |
+                       COND(s[HS].v, A5XX_SP_HS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                       A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) |
+                       COND(s[DS].v, A5XX_SP_DS_CONTROL_REG_ENABLED));
+       OUT_RING(ring, A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                       A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) |
+                       COND(s[GS].v, A5XX_SP_GS_CONTROL_REG_ENABLED));
+
+       OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
+       OUT_RING(ring, s[VS].constlen);    /* HLSQ_VS_CONSTLEN */
+       OUT_RING(ring, s[VS].instrlen);    /* HLSQ_VS_INSTRLEN */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
+       OUT_RING(ring, s[FS].constlen);    /* HLSQ_FS_CONSTLEN */
+       OUT_RING(ring, s[FS].instrlen);    /* HLSQ_FS_INSTRLEN */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
+       OUT_RING(ring, s[HS].constlen);    /* HLSQ_HS_CONSTLEN */
+       OUT_RING(ring, s[HS].instrlen);    /* HLSQ_HS_INSTRLEN */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
+       OUT_RING(ring, s[DS].constlen);    /* HLSQ_DS_CONSTLEN */
+       OUT_RING(ring, s[DS].instrlen);    /* HLSQ_DS_INSTRLEN */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
+       OUT_RING(ring, s[GS].constlen);    /* HLSQ_GS_CONSTLEN */
+       OUT_RING(ring, s[GS].instrlen);    /* HLSQ_GS_INSTRLEN */
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_3, 2);
+       OUT_RING(ring, 0x00000000);   /* HLSQ_CONTEXT_SWITCH_CS_SW_3 */
+       OUT_RING(ring, 0x00000000);   /* HLSQ_CONTEXT_SWITCH_CS_SW_4 */
+
+       OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
+       OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+                       A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+                       0x6 | /* XXX seems to be always set? */
+                       A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+
+       struct ir3_shader_linkage l = {0};
+       ir3_link_shaders(&l, s[VS].v, s[FS].v);
+
+       /* a5xx appends pos/psize to end of the linkage map: */
+       if (pos_regid != regid(63,0))
+               ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
+
+       if (psize_regid != regid(63,0))
+               ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
+
+       for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
+               uint32_t reg = 0;
+
+               OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);
+
+               reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
+               reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
+               j++;
+
+               reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
+               reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
+               j++;
+
+               OUT_RING(ring, reg);
+       }
+
+       for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
+               uint32_t reg = 0;
+
+               OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);
+
+               reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
+               reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
+               reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
+               reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
+
+               OUT_RING(ring, reg);
+       }
+
+       OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
+       OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_LO/HI */
+
+       if (s[VS].instrlen)
+               emit_shader(ring, s[VS].v);
+
+       BITSET_DECLARE(varbs, 128) = {0};
+       uint32_t *varmask = (uint32_t *)varbs;
+
+       for (i = 0; i < l.cnt; i++)
+               for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
+                       BITSET_SET(varbs, l.var[i].loc + j);
+
+       OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
+       OUT_RING(ring, ~varmask[0]);  /* VPC_VAR[0].DISABLE */
+       OUT_RING(ring, ~varmask[1]);  /* VPC_VAR[1].DISABLE */
+       OUT_RING(ring, ~varmask[2]);  /* VPC_VAR[2].DISABLE */
+       OUT_RING(ring, ~varmask[3]);  /* VPC_VAR[3].DISABLE */
+
+       // TODO depending on other bits in this reg (if any) set somewhere else?
+       OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));
+
+       if (emit->key.binning_pass) {
+               OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
+               OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_LO */
+               OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_HI */
+       } else {
+               uint32_t stride_in_vpc = align(s[FS].v->total_in, 4) + 4;
+
+               if (s[VS].v->writes_psize)
+                       stride_in_vpc++;
+
+               // TODO if some of these other bits depend on something other than
+               // program state we should probably move these next three regs:
+
+               OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
+               OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
+
+               OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
+               OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(stride_in_vpc) |
+                               COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
+                               0x10000);    // XXX
+
+               OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
+               OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(stride_in_vpc) |
+                               0x400);      // XXX
+
+               OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
+               OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_LO/HI */
+       }
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
+       OUT_RING(ring, 0x00000881);        /* XXX HLSQ_CONTROL_0 */
+       OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
+       OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
+                       0xfcfcfc00);               /* XXX */
+       OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) |
+                       0xfcfcfc00);               /* XXX */
+       OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
+                       A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
+                       0x0000fcfc);               /* XXX */
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+       OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING));
+
+       OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
+       OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
+                       0x4000e | /* XXX set pretty much everywhere */
+                       A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+                       A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+                       A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+       OUT_RING(ring, 0x020fffff);        /* XXX */
+
+       OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
+       OUT_RING(ring, 0x0000ffff);        /* XXX */
+
+       OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
+       OUT_RING(ring, 0x00000010);        /* XXX */
+
+       OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 3);
+       OUT_RING(ring,
+                       COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) |
+                       COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD |
+                                       A5XX_RB_RENDER_CONTROL0_YCOORD |
+                                       A5XX_RB_RENDER_CONTROL0_ZCOORD |
+                                       A5XX_RB_RENDER_CONTROL0_WCOORD));
+       OUT_RING(ring,
+                       COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS));
+       OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
+                       COND(s[FS].v->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));
+
+       OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 9);
+       OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
+                       A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
+                       A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
+       for (i = 0; i < 8; i++) {
+               OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
+                               COND(emit->key.half_precision,
+                                       A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
+       }
+
+       if (emit->key.binning_pass) {
+               OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
+               OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(0));
+       } else {
+               uint32_t vinterp[8], vpsrepl[8];
+
+               memset(vinterp, 0, sizeof(vinterp));
+               memset(vpsrepl, 0, sizeof(vpsrepl));
+
+               /* looks like we need to do int varyings in the frag
+                * shader on a5xx (no flatshad reg?  or a420.0 bug?):
+                *
+                *    (sy)(ss)nop
+                *    (sy)ldlv.u32 r0.x,l[r0.x], 1
+                *    ldlv.u32 r0.y,l[r0.x+1], 1
+                *    (ss)bary.f (ei)r63.x, 0, r0.x
+                *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+                *    (rpt5)nop
+                *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+                *
+                * Possibly on later a5xx variants we'll be able to use
+                * something like the code below instead of workaround
+                * in the shader:
+                */
+               /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
+               for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
+                       /* NOTE: varyings are packed, so if compmask is 0xb
+                        * then first, third, and fourth component occupy
+                        * three consecutive varying slots:
+                        */
+                       unsigned compmask = s[FS].v->inputs[j].compmask;
+
+                       uint32_t inloc = s[FS].v->inputs[j].inloc;
+
+                       if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
+                                       (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
+                               uint32_t loc = inloc;
+
+                               for (i = 0; i < 4; i++) {
+                                       if (compmask & (1 << i)) {
+                                               vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+                                               //flatshade[loc / 32] |= 1 << (loc % 32);
+                                               loc++;
+                                       }
+                               }
+                       }
+
+                       gl_varying_slot slot = s[FS].v->inputs[j].slot;
+
+                       /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+                       if (slot >= VARYING_SLOT_VAR0) {
+                               unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+                               /* Replace the .xy coordinates with S/T from the point sprite. Set
+                                * interpolation bits for .zw such that they become .01
+                                */
+                               if (emit->sprite_coord_enable & texmask) {
+                                       /* mask is two 2-bit fields, where:
+                                        *   '01' -> S
+                                        *   '10' -> T
+                                        *   '11' -> 1 - T  (flip mode)
+                                        */
+                                       unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
+                                       uint32_t loc = inloc;
+                                       if (compmask & 0x1) {
+                                               vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+                                               loc++;
+                                       }
+                                       if (compmask & 0x2) {
+                                               vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+                                               loc++;
+                                       }
+                                       if (compmask & 0x4) {
+                                               /* .z <- 0.0f */
+                                               vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+                                               loc++;
+                                       }
+                                       if (compmask & 0x8) {
+                                               /* .w <- 1.0f */
+                                               vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+                                               loc++;
+                                       }
+                               }
+                       }
+               }
+
+               OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
+               OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
+                               (s[VS].v->writes_psize ? 0x0c00 : 0xff00)); // XXX
+
+               OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
+               for (i = 0; i < 8; i++)
+                       OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
+
+               OUT_PKT4(ring, REG_A5XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+               for (i = 0; i < 8; i++)
+                       OUT_RING(ring, vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
+       }
+
+       if (!emit->key.binning_pass)
+               if (s[FS].instrlen)
+                       emit_shader(ring, s[FS].v);
+
+       OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_1, 5);
+       OUT_RING(ring, A5XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+                       A5XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
+                       0xfc);
+       OUT_RING(ring, 0x0000fcfc);   /* VFD_CONTROL_2 */
+       OUT_RING(ring, 0x0000fcfc);   /* VFD_CONTROL_3 */
+       OUT_RING(ring, 0x000000fc);   /* VFD_CONTROL_4 */
+       OUT_RING(ring, 0x00000000);   /* VFD_CONTROL_5 */
+}
+
+void
+fd5_prog_init(struct pipe_context *pctx)
+{
+       pctx->create_fs_state = fd5_fp_state_create;
+       pctx->delete_fs_state = fd5_fp_state_delete;
+
+       pctx->create_vs_state = fd5_vp_state_create;
+       pctx->delete_vs_state = fd5_vp_state_delete;
+
+       fd_prog_init(pctx);
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.h b/src/gallium/drivers/freedreno/a5xx/fd5_program.h
new file mode 100644 (file)
index 0000000..cd03bc5
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_PROGRAM_H_
+#define FD5_PROGRAM_H_
+
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+
+struct fd5_shader_stateobj {
+       struct ir3_shader *shader;
+};
+
+struct fd5_emit;
+
+void fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit,
+               int nr, struct pipe_surface **bufs);
+
+void fd5_prog_init(struct pipe_context *pctx);
+
+#endif /* FD5_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/src/gallium/drivers/freedreno/a5xx/fd5_query.c
new file mode 100644 (file)
index 0000000..894c682
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "fd5_query.h"
+
+void fd5_query_context_init(struct pipe_context *pctx)
+{
+       /* TODO */
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.h b/src/gallium/drivers/freedreno/a5xx/fd5_query.h
new file mode 100644 (file)
index 0000000..2e563b0
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_QUERY_H_
+#define FD5_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd5_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD5_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c
new file mode 100644 (file)
index 0000000..6741852
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd5_rasterizer.h"
+#include "fd5_context.h"
+#include "fd5_format.h"
+
+void *
+fd5_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso)
+{
+       struct fd5_rasterizer_stateobj *so;
+       float psize_min, psize_max;
+
+       so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       if (cso->point_size_per_vertex) {
+               psize_min = util_get_min_point_size(cso);
+               psize_max = 4092;
+       } else {
+               /* Force the point size to be as if the vertex output was disabled. */
+               psize_min = cso->point_size;
+               psize_max = cso->point_size;
+       }
+
+       so->gras_cl_clip_cntl = 0x80000; /* ??? */
+       so->gras_su_point_minmax =
+                       A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+                       A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+       so->gras_su_point_size   = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
+       so->gras_su_poly_offset_scale =
+                       A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+       so->gras_su_poly_offset_offset =
+                       A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+       so->gras_su_poly_offset_clamp =
+                       A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
+
+       so->gras_su_cntl =
+                       A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0);
+//     so->pc_prim_vtx_cntl2 =
+//             A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+//             A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+//     if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+//             cso->fill_back != PIPE_POLYGON_MODE_FILL)
+//             so->pc_prim_vtx_cntl2 |= A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
+//
+//     if (cso->cull_face & PIPE_FACE_FRONT)
+//             so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
+//     if (cso->cull_face & PIPE_FACE_BACK)
+//             so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
+       if (!cso->front_ccw)
+               so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
+//     if (!cso->flatshade_first)
+//             so->pc_prim_vtx_cntl |= A5XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+
+       if (cso->offset_tri)
+               so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
+
+//     if (!cso->depth_clip)
+//             so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+//                     A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
+//     if (cso->clip_halfz)
+//             so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h
new file mode 100644 (file)
index 0000000..1c8771f
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_RASTERIZER_H_
+#define FD5_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd5_rasterizer_stateobj {
+       struct pipe_rasterizer_state base;
+
+       uint32_t gras_su_point_minmax;
+       uint32_t gras_su_point_size;
+       uint32_t gras_su_poly_offset_scale;
+       uint32_t gras_su_poly_offset_offset;
+       uint32_t gras_su_poly_offset_clamp;
+
+       uint32_t gras_su_cntl;
+       uint32_t gras_cl_clip_cntl;
+       uint32_t pc_prim_vtx_cntl;
+       uint32_t pc_prim_vtx_cntl2;
+};
+
+static inline struct fd5_rasterizer_stateobj *
+fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+       return (struct fd5_rasterizer_stateobj *)rast;
+}
+
+void * fd5_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso);
+
+#endif /* FD5_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/src/gallium/drivers/freedreno/a5xx/fd5_screen.c
new file mode 100644 (file)
index 0000000..96f83ed
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+
+#include "fd5_screen.h"
+#include "fd5_context.h"
+#include "fd5_format.h"
+#include "ir3_compiler.h"
+
+static boolean
+fd5_screen_is_format_supported(struct pipe_screen *pscreen,
+               enum pipe_format format,
+               enum pipe_texture_target target,
+               unsigned sample_count,
+               unsigned usage)
+{
+       unsigned retval = 0;
+
+       if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                       (sample_count > 1) || /* TODO add MSAA */
+                       !util_format_is_supported(format, usage)) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                               util_format_name(format), target, sample_count, usage);
+               return FALSE;
+       }
+
+       if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+                       (fd5_pipe2vtx(format) != (enum a5xx_vtx_fmt)~0)) {
+               retval |= PIPE_BIND_VERTEX_BUFFER;
+       }
+
+       if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+                       (target == PIPE_BUFFER ||
+                        util_format_get_blocksize(format) != 12) &&
+                       (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) {
+               retval |= PIPE_BIND_SAMPLER_VIEW;
+       }
+
+       if ((usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED)) &&
+                       (fd5_pipe2color(format) != (enum a5xx_color_fmt)~0) &&
+                       (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) {
+               retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED);
+       }
+
+       if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                       (fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) &&
+                       (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) {
+               retval |= PIPE_BIND_DEPTH_STENCIL;
+       }
+
+       if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                       (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
+               retval |= PIPE_BIND_INDEX_BUFFER;
+       }
+
+       if (retval != usage) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                               "usage=%x, retval=%x", util_format_name(format),
+                               target, sample_count, usage, retval);
+       }
+
+       return retval == usage;
+}
+
+void
+fd5_screen_init(struct pipe_screen *pscreen)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       screen->max_rts = A5XX_MAX_RENDER_TARGETS;
+       screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id);
+       pscreen->context_create = fd5_context_create;
+       pscreen->is_format_supported = fd5_screen_is_format_supported;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_screen.h b/src/gallium/drivers/freedreno/a5xx/fd5_screen.h
new file mode 100644 (file)
index 0000000..ba0c7f1
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_SCREEN_H_
+#define FD5_SCREEN_H_
+
+#include "pipe/p_screen.h"
+
+void fd5_screen_init(struct pipe_screen *pscreen);
+
+#endif /* FD5_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c
new file mode 100644 (file)
index 0000000..a8604b7
--- /dev/null
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "fd5_texture.h"
+#include "fd5_format.h"
+
+static enum a5xx_tex_clamp
+tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border)
+{
+       /* Hardware does not support _CLAMP, but we emulate it: */
+       if (wrap == PIPE_TEX_WRAP_CLAMP) {
+               wrap = (clamp_to_edge) ?
+                       PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+       }
+
+       switch (wrap) {
+       case PIPE_TEX_WRAP_REPEAT:
+               return A5XX_TEX_REPEAT;
+       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+               return A5XX_TEX_CLAMP_TO_EDGE;
+       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+               *needs_border = true;
+               return A5XX_TEX_CLAMP_TO_BORDER;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+               /* only works for PoT.. need to emulate otherwise! */
+               return A5XX_TEX_MIRROR_CLAMP;
+       case PIPE_TEX_WRAP_MIRROR_REPEAT:
+               return A5XX_TEX_MIRROR_REPEAT;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               /* these two we could perhaps emulate, but we currently
+                * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+                */
+       default:
+               DBG("invalid wrap: %u", wrap);
+               return 0;
+       }
+}
+
+static enum a5xx_tex_filter
+tex_filter(unsigned filter, bool aniso)
+{
+       switch (filter) {
+       case PIPE_TEX_FILTER_NEAREST:
+               return A5XX_TEX_NEAREST;
+       case PIPE_TEX_FILTER_LINEAR:
+               return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
+       default:
+               DBG("invalid filter: %u", filter);
+               return 0;
+       }
+}
+
+static void *
+fd5_sampler_state_create(struct pipe_context *pctx,
+               const struct pipe_sampler_state *cso)
+{
+       struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
+       unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+       bool miplinear = false;
+       bool clamp_to_edge;
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+               miplinear = true;
+
+       /*
+        * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE;  for linear
+        * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+        * clamping the texture coordinates to [0.0, 1.0].
+        *
+        * The clamping will be taken care of in the shaders.  There are two
+        * filters here, but let the minification one has a say.
+        */
+       clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+       if (!clamp_to_edge) {
+               so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+       }
+
+       so->needs_border = false;
+       so->texsamp0 =
+               COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
+               A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+               A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+               A5XX_TEX_SAMP_0_ANISO(aniso) |
+               A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge, &so->needs_border)) |
+               A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge, &so->needs_border)) |
+               A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge, &so->needs_border));
+
+       so->texsamp1 =
+//             COND(miplinear, A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+               COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
+               COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
+
+       if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+               so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
+               so->texsamp1 |=
+                       A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+                       A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+       }
+
+       if (cso->compare_mode)
+               so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+       return so;
+}
+
+static void
+fd5_sampler_states_bind(struct pipe_context *pctx,
+               enum pipe_shader_type shader, unsigned start,
+               unsigned nr, void **hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd5_context *fd5_ctx = fd5_context(ctx);
+       uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
+       unsigned i;
+
+       if (!hwcso)
+               nr = 0;
+
+       for (i = 0; i < nr; i++) {
+               if (hwcso[i]) {
+                       struct fd5_sampler_stateobj *sampler =
+                                       fd5_sampler_stateobj(hwcso[i]);
+                       if (sampler->saturate_s)
+                               saturate_s |= (1 << i);
+                       if (sampler->saturate_t)
+                               saturate_t |= (1 << i);
+                       if (sampler->saturate_r)
+                               saturate_r |= (1 << i);
+               }
+       }
+
+       fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+
+       if (shader == PIPE_SHADER_FRAGMENT) {
+               fd5_ctx->fsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
+               fd5_ctx->fsaturate_s = saturate_s;
+               fd5_ctx->fsaturate_t = saturate_t;
+               fd5_ctx->fsaturate_r = saturate_r;
+       } else if (shader == PIPE_SHADER_VERTEX) {
+               fd5_ctx->vsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
+               fd5_ctx->vsaturate_s = saturate_s;
+               fd5_ctx->vsaturate_t = saturate_t;
+               fd5_ctx->vsaturate_r = saturate_r;
+       }
+}
+
+static enum a5xx_tex_type
+tex_type(unsigned target)
+{
+       switch (target) {
+       default:
+               assert(0);
+       case PIPE_BUFFER:
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_1D_ARRAY:
+               return A5XX_TEX_1D;
+       case PIPE_TEXTURE_RECT:
+       case PIPE_TEXTURE_2D:
+       case PIPE_TEXTURE_2D_ARRAY:
+               return A5XX_TEX_2D;
+       case PIPE_TEXTURE_3D:
+               return A5XX_TEX_3D;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               return A5XX_TEX_CUBE;
+       }
+}
+
+static bool
+use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
+{
+       return (fd_screen(pctx->screen)->gpu_id == 420) &&
+               (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
+}
+
+static struct pipe_sampler_view *
+fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+               const struct pipe_sampler_view *cso)
+{
+       struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
+       struct fd_resource *rsc = fd_resource(prsc);
+       unsigned lvl, layers;
+       uint32_t sz2 = 0;
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+       pipe_reference(NULL, &prsc->reference);
+       so->base.texture = prsc;
+       so->base.reference.count = 1;
+       so->base.context = pctx;
+
+       so->texconst0 =
+               A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(cso->format)) |
+               fd5_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                               cso->swizzle_b, cso->swizzle_a);
+
+       if (util_format_is_srgb(cso->format)) {
+               if (use_astc_srgb_workaround(pctx, cso->format))
+                       so->astc_srgb = true;
+               so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
+       }
+
+       if (cso->target == PIPE_BUFFER) {
+               unsigned elements = cso->u.buf.size / util_format_get_blocksize(cso->format);
+
+               lvl = 0;
+               so->texconst1 =
+                       A5XX_TEX_CONST_1_WIDTH(elements) |
+                       A5XX_TEX_CONST_1_HEIGHT(1);
+               so->texconst2 =
+                       A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) |
+                       A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+               so->offset = cso->u.buf.offset;
+       } else {
+//             unsigned miplevels;
+
+               lvl = fd_sampler_first_level(cso);
+//             miplevels = fd_sampler_last_level(cso) - lvl;
+               layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+//             so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
+               so->texconst1 =
+                       A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+                       A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+               so->texconst2 =
+                       A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) |
+                       A5XX_TEX_CONST_2_PITCH(
+                                       util_format_get_nblocksx(
+                                                       cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+               so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+       }
+
+       so->texconst2 |= A5XX_TEX_CONST_2_TYPE(tex_type(cso->target));
+
+       switch (cso->target) {
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_2D:
+               so->texconst3 =
+                       A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0);
+               so->texconst5 =
+                       A5XX_TEX_CONST_5_DEPTH(1);
+               break;
+       case PIPE_TEXTURE_1D_ARRAY:
+       case PIPE_TEXTURE_2D_ARRAY:
+               so->texconst3 =
+                       A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size);
+               so->texconst5 =
+                       A5XX_TEX_CONST_5_DEPTH(layers);
+               break;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               so->texconst3 =
+                       A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size);
+               so->texconst5 =
+                       A5XX_TEX_CONST_5_DEPTH(layers / 6);
+               break;
+       case PIPE_TEXTURE_3D:
+               while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+                       sz2 = rsc->slices[++lvl].size0;
+               so->texconst3 =
+                       A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0);
+               so->texconst5 =
+                       A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
+               break;
+       default:
+               so->texconst3 = 0x00000000;
+               break;
+       }
+
+       return &so->base;
+}
+
+static void
+fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
+               unsigned start, unsigned nr,
+               struct pipe_sampler_view **views)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd5_context *fd5_ctx = fd5_context(ctx);
+       uint16_t astc_srgb = 0;
+       unsigned i;
+
+       for (i = 0; i < nr; i++) {
+               if (views[i]) {
+                       struct fd5_pipe_sampler_view *view =
+                                       fd5_pipe_sampler_view(views[i]);
+                       if (view->astc_srgb)
+                               astc_srgb |= (1 << i);
+               }
+       }
+
+       fd_set_sampler_views(pctx, shader, start, nr, views);
+
+       if (shader == PIPE_SHADER_FRAGMENT) {
+               fd5_ctx->fastc_srgb = astc_srgb;
+       } else if (shader == PIPE_SHADER_VERTEX) {
+               fd5_ctx->vastc_srgb = astc_srgb;
+       }
+}
+
+void
+fd5_texture_init(struct pipe_context *pctx)
+{
+       pctx->create_sampler_state = fd5_sampler_state_create;
+       pctx->bind_sampler_states = fd5_sampler_states_bind;
+       pctx->create_sampler_view = fd5_sampler_view_create;
+       pctx->set_sampler_views = fd5_set_sampler_views;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.h b/src/gallium/drivers/freedreno/a5xx/fd5_texture.h
new file mode 100644 (file)
index 0000000..c4d1093
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_TEXTURE_H_
+#define FD5_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+
+#include "fd5_context.h"
+#include "fd5_format.h"
+
+struct fd5_sampler_stateobj {
+       struct pipe_sampler_state base;
+       uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
+       bool saturate_s, saturate_t, saturate_r;
+       bool needs_border;
+};
+
+static inline struct fd5_sampler_stateobj *
+fd5_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+       return (struct fd5_sampler_stateobj *)samp;
+}
+
+struct fd5_pipe_sampler_view {
+       struct pipe_sampler_view base;
+       uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
+       uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
+       uint32_t offset;
+       bool astc_srgb;
+};
+
+static inline struct fd5_pipe_sampler_view *
+fd5_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+       return (struct fd5_pipe_sampler_view *)pview;
+}
+
+unsigned fd5_get_const_idx(struct fd_context *ctx,
+               struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd5_texture_init(struct pipe_context *pctx);
+
+#endif /* FD5_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c
new file mode 100644 (file)
index 0000000..f113a92
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd5_zsa.h"
+#include "fd5_context.h"
+#include "fd5_format.h"
+
+void *
+fd5_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso)
+{
+       struct fd5_zsa_stateobj *so;
+
+       so = CALLOC_STRUCT(fd5_zsa_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->rb_depth_cntl |=
+               A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+       if (cso->depth.enabled)
+               so->rb_depth_cntl |=
+                       A5XX_RB_DEPTH_CNTL_Z_ENABLE |
+                       A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
+       if (cso->depth.writemask)
+               so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+       if (cso->stencil[0].enabled) {
+               const struct pipe_stencil_state *s = &cso->stencil[0];
+
+               so->rb_stencil_control |=
+                       A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
+                       A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                       A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+                       A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+                       A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+                       A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+               so->rb_stencilrefmask |=
+                       A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                       A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+               if (cso->stencil[1].enabled) {
+                       const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+                       so->rb_stencil_control |=
+                               A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                               A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+                               A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+                               A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                               A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+//                     so->rb_stencilrefmask_bf |=
+//                             A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+//                             A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+               }
+       }
+
+       if (cso->alpha.enabled) {
+               uint32_t ref = cso->alpha.ref_value * 255.0;
+               so->gras_su_depth_plane_cntl =
+                       A5XX_GRAS_SU_DEPTH_PLANE_CNTL_ALPHA_TEST_ENABLE;
+               so->rb_alpha_control =
+                       A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+                       A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
+                       A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+//             so->rb_depth_control |=
+//                     A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+       }
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h
new file mode 100644 (file)
index 0000000..02c116a
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_ZSA_H_
+#define FD5_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd5_zsa_stateobj {
+       struct pipe_depth_stencil_alpha_state base;
+
+       uint32_t gras_su_depth_plane_cntl;
+       uint32_t rb_alpha_control;
+       uint32_t rb_depth_cntl;
+       uint32_t rb_stencil_control;
+       uint32_t rb_stencilrefmask;
+};
+
+static inline struct fd5_zsa_stateobj *
+fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+       return (struct fd5_zsa_stateobj *)zsa;
+}
+
+void * fd5_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso);
+
+#endif /* FD5_ZSA_H_ */
index 276f6be..ec6f1cd 100644 (file)
@@ -403,3 +403,18 @@ fd_batch_check_size(struct fd_batch *batch)
                        (fd_mesa_debug & FD_DBG_FLUSH))
                fd_batch_flush(batch, true);
 }
+
+/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
+ * been one since last draw:
+ */
+void
+fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+       if (batch->needs_wfi) {
+               if (batch->ctx->screen->gpu_id >= 500)
+                       OUT_WFI5(ring);
+               else
+                       OUT_WFI(ring);
+               batch->needs_wfi = false;
+       }
+}
index aeeb9c5..1e95459 100644 (file)
@@ -261,17 +261,7 @@ fd_reset_wfi(struct fd_batch *batch)
        batch->needs_wfi = true;
 }
 
-/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
- * been one since last draw:
- */
-static inline void
-fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
-{
-       if (batch->needs_wfi) {
-               OUT_WFI(ring);
-               batch->needs_wfi = false;
-       }
-}
+void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring);
 
 /* emit a CP_EVENT_WRITE:
  */
index c4c08a6..e56fef9 100644 (file)
@@ -257,6 +257,7 @@ struct fd_context {
        void (*emit_tile_mem2gmem)(struct fd_batch *batch, struct fd_tile *tile);
        void (*emit_tile_renderprep)(struct fd_batch *batch, struct fd_tile *tile);
        void (*emit_tile_gmem2mem)(struct fd_batch *batch, struct fd_tile *tile);
+       void (*emit_tile_fini)(struct fd_batch *batch);   /* optional */
 
        /* optional, for GMEM bypass: */
        void (*emit_sysmem_prep)(struct fd_batch *batch);
index b94e33d..3656538 100644 (file)
@@ -69,7 +69,7 @@
 
 static uint32_t bin_width(struct fd_screen *screen)
 {
-       if (is_a4xx(screen))
+       if (is_a4xx(screen) || is_a5xx(screen))
                return 1024;
        if (is_a3xx(screen))
                return 992;
@@ -339,6 +339,9 @@ render_tiles(struct fd_batch *batch)
                /* emit gmem2mem to transfer tile back to system memory: */
                ctx->emit_tile_gmem2mem(batch, tile);
        }
+
+       if (ctx->emit_tile_fini)
+               ctx->emit_tile_fini(batch);
 }
 
 static void
index f6ec576..48b7964 100644 (file)
@@ -53,6 +53,7 @@
 #include "a2xx/fd2_screen.h"
 #include "a3xx/fd3_screen.h"
 #include "a4xx/fd4_screen.h"
+#include "a5xx/fd5_screen.h"
 
 #include "ir3/ir3_nir.h"
 
@@ -667,6 +668,9 @@ fd_screen_create(struct fd_device *dev)
        case 430:
                fd4_screen_init(pscreen);
                break;
+       case 530:
+               fd5_screen_init(pscreen);
+               break;
        default:
                debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
                goto fail;
index 3fc66fb..6a7b2a8 100644 (file)
@@ -114,6 +114,12 @@ is_a4xx(struct fd_screen *screen)
        return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
 }
 
+static inline boolean
+is_a5xx(struct fd_screen *screen)
+{
+       return (screen->gpu_id >= 500) && (screen->gpu_id < 600);
+}
+
 /* is it using the ir3 compiler (shader isa introduced with a3xx)? */
 static inline boolean
 is_ir3(struct fd_screen *screen)
index 3009700..a2d1358 100644 (file)
@@ -57,8 +57,9 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
 #define A2XX_MAX_RENDER_TARGETS 1
 #define A3XX_MAX_RENDER_TARGETS 4
 #define A4XX_MAX_RENDER_TARGETS 8
+#define A5XX_MAX_RENDER_TARGETS 8
 
-#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS
+#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS
 
 #define FD_DBG_MSGS     0x0001
 #define FD_DBG_DISASM   0x0002
@@ -176,6 +177,7 @@ fd_half_precision(struct pipe_framebuffer_state *pfb)
 #define LOG_DWORDS 0
 
 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
+static inline void emit_marker5(struct fd_ringbuffer *ring, int scratch_idx);
 
 static inline void
 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
@@ -202,39 +204,45 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
        }));
 }
 
+/*
+ * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
+ */
+
 static inline void
 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
-               uint32_t offset, uint32_t or, int32_t shift)
+               uint32_t offset, uint64_t or, int32_t shift)
 {
        if (LOG_DWORDS) {
                DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
                                (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
        }
        debug_assert(offset < fd_bo_size(bo));
-       fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+       fd_ringbuffer_reloc2(ring, &(struct fd_reloc){
                .bo = bo,
                .flags = FD_RELOC_READ,
                .offset = offset,
                .or = or,
                .shift = shift,
+               .orhi = or >> 32,
        });
 }
 
 static inline void
 OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
-               uint32_t offset, uint32_t or, int32_t shift)
+               uint32_t offset, uint64_t or, int32_t shift)
 {
        if (LOG_DWORDS) {
                DBG("ring[%p]: OUT_RELOCW  %04x:  %p+%u << %d", ring,
                                (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
        }
        debug_assert(offset < fd_bo_size(bo));
-       fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+       fd_ringbuffer_reloc2(ring, &(struct fd_reloc){
                .bo = bo,
                .flags = FD_RELOC_READ | FD_RELOC_WRITE,
                .offset = offset,
                .or = or,
                .shift = shift,
+               .orhi = or >> 32,
        });
 }
 
@@ -244,9 +252,18 @@ static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
                fd_ringbuffer_grow(ring, ndwords);
 }
 
+static inline uint32_t
+__gpu_id(struct fd_ringbuffer *ring)
+{
+       uint64_t val;
+       fd_pipe_get_param(ring->pipe, FD_GPU_ID, &val);
+       return val;
+}
+
 static inline void
 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
 {
+       debug_assert(__gpu_id(ring) < 500);
        BEGIN_RING(ring, cnt+1);
        OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
 }
@@ -254,6 +271,7 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
 static inline void
 OUT_PKT2(struct fd_ringbuffer *ring)
 {
+       debug_assert(__gpu_id(ring) < 500);
        BEGIN_RING(ring, 1);
        OUT_RING(ring, CP_TYPE2_PKT);
 }
@@ -261,10 +279,48 @@ OUT_PKT2(struct fd_ringbuffer *ring)
 static inline void
 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
 {
+       debug_assert(__gpu_id(ring) < 500);
        BEGIN_RING(ring, cnt+1);
        OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
 }
 
+/*
+ * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
+ */
+
+static inline unsigned
+_odd_parity_bit(unsigned val)
+{
+       /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+        * note that we want odd parity so 0x6996 is inverted.
+        */
+       val ^= val >> 16;
+       val ^= val >> 8;
+       val ^= val >> 4;
+       val &= 0xf;
+       return (~0x6996 >> val) & 1;
+}
+
+static inline void
+OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+       BEGIN_RING(ring, cnt+1);
+       OUT_RING(ring, CP_TYPE4_PKT | cnt |
+                       (_odd_parity_bit(cnt) << 7) |
+                       ((regindx & 0x3ffff) << 8) |
+                       ((_odd_parity_bit(regindx) << 27)));
+}
+
+static inline void
+OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+       BEGIN_RING(ring, cnt+1);
+       OUT_RING(ring, CP_TYPE7_PKT | cnt |
+                       (_odd_parity_bit(cnt) << 15) |
+                       ((opcode & 0x7f) << 16) |
+                       ((_odd_parity_bit(opcode) << 23)));
+}
+
 static inline void
 OUT_WFI(struct fd_ringbuffer *ring)
 {
@@ -273,10 +329,18 @@ OUT_WFI(struct fd_ringbuffer *ring)
 }
 
 static inline void
+OUT_WFI5(struct fd_ringbuffer *ring)
+{
+       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+}
+
+static inline void
 __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target)
 {
        unsigned count = fd_ringbuffer_cmd_count(target);
 
+       debug_assert(__gpu_id(ring) < 500);
+
        /* for debug after a lock up, write a unique counter value
         * to scratch6 for each IB, to make it easier to match up
         * register dumps to cmdstream.  The combination of IB and
@@ -297,7 +361,34 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target
        emit_marker(ring, 6);
 }
 
+static inline void
+__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
+{
+       unsigned count = fd_ringbuffer_cmd_count(target);
+
+       /* for debug after a lock up, write a unique counter value
+        * to scratch6 for each IB, to make it easier to match up
+        * register dumps to cmdstream.  The combination of IB and
+        * DRAW (scratch7) is enough to "triangulate" the particular
+        * draw that caused lockup.
+        */
+       emit_marker5(ring, 6);
+
+       for (unsigned i = 0; i < count; i++) {
+               uint32_t dwords;
+               OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
+               dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
+               assert(dwords > 0);
+               OUT_RING(ring, dwords);
+       }
+
+       emit_marker5(ring, 6);
+}
+
 /* CP_SCRATCH_REG4 is used to hold base address for query results: */
+// XXX annoyingly scratch regs move on a5xx.. and additionally different
+// packet types.. so freedreno_query_hw is going to need a bit of
+// rework..
 #define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
 
 static inline void
@@ -312,6 +403,21 @@ emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
        OUT_RING(ring, ++marker_cnt);
 }
 
+static inline void
+emit_marker5(struct fd_ringbuffer *ring, int scratch_idx)
+{
+       extern unsigned marker_cnt;
+//XXX  unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
+       unsigned reg = 0x00000b78 + scratch_idx;
+       assert(reg != HW_QUERY_BASE_REG);
+       if (reg == HW_QUERY_BASE_REG)
+               return;
+       OUT_WFI5(ring);
+       OUT_PKT4(ring, reg, 1);
+       OUT_RING(ring, ++marker_cnt);
+       OUT_WFI5(ring);
+}
+
 /* helper to get numeric value from environment variable..  mostly
  * just leaving this here because it is helpful to brute-force figure
  * out unknown formats, etc, which blob driver does not support: