turnip: Add debug option to find usage of stale reg values
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Mon, 6 Feb 2023 19:50:41 +0000 (20:50 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 16 Feb 2023 17:43:10 +0000 (17:43 +0000)
MESA_VK_ABORT_ON_DEVICE_LOSS=1 \
TU_DEBUG_STALE_REGS_RANGE=0x00000c00,0x0000be01 \
TU_DEBUG_STALE_REGS_FLAGS=cmdbuf,renderpass \
./app

To pinpoint the reg causing a failure reducing regs range could be
used for bisection. Some failures may be caused by multi-reg combination,
in such case set 'inverse' flag which would change the meaning of reg
range to "do not stomp these regs".

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21226>

src/freedreno/common/freedreno_stompable_regs.h [new file with mode: 0644]
src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_device.h

diff --git a/src/freedreno/common/freedreno_stompable_regs.h b/src/freedreno/common/freedreno_stompable_regs.h
new file mode 100644 (file)
index 0000000..f0ddd03
--- /dev/null
@@ -0,0 +1,212 @@
+/*
+ * Copyright © 2023 Igalia S.L.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef __FREEDRENO_STOMPABLE_REGS_H__
+#define __FREEDRENO_STOMPABLE_REGS_H__
+
+#include <stdint.h>
+
+#include "a6xx.xml.h"
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+
+/* In order to debug issues with usage of stale reg data we need to have
+ * a list of regs which we allowed to stomp.
+ * The regs we are NOT allowed to stomp are:
+ * - Write protected;
+ * - Written by kernel but are not write protected;
+ * - Some regs that are not written by anyone but do affect the result.
+ *
+ * In addition, some regs are only emmitted during cmdbuf setup
+ * so we have to have additional filter to get a reduced list of regs
+ * stompable before each renderpass/blit.
+ */
+
+struct fd_stompable_reg_range {
+   uint16_t start_reg;
+   uint16_t end_reg;
+};
+
+static const struct fd_stompable_reg_range
+   a6xx_fd_cmdbuf_stompable_reg_ranges[] = {
+      {REG_A6XX_VSC_BIN_SIZE, REG_A6XX_VSC_DRAW_STRM_SIZE(31)},
+      {REG_A6XX_UCHE_UNKNOWN_0E12, REG_A6XX_UCHE_UNKNOWN_0E12},
+      {REG_A6XX_GRAS_CL_CNTL, REG_A6XX_GRAS_LRZ_DEPTH_VIEW},
+      {REG_A6XX_GRAS_2D_BLIT_CNTL, REG_A6XX_GRAS_2D_RESOLVE_CNTL_2},
+      {REG_A6XX_RB_BIN_CONTROL, REG_A6XX_RB_SAMPLE_LOCATION_1},
+      {REG_A6XX_RB_RENDER_CONTROL0, REG_A6XX_RB_UNKNOWN_8811},
+      {REG_A6XX_RB_UNKNOWN_8818, REG_A6XX_RB_UNKNOWN_881E},
+      {REG_A6XX_RB_MRT(0), REG_A6XX_RB_BLEND_CNTL},
+      {REG_A6XX_RB_DEPTH_PLANE_CNTL, REG_A6XX_RB_Z_BOUNDS_MAX},
+      {REG_A6XX_RB_STENCIL_CONTROL, REG_A6XX_RB_STENCILWRMASK},
+      {REG_A6XX_RB_WINDOW_OFFSET, REG_A6XX_RB_SAMPLE_COUNT_CONTROL},
+      {REG_A6XX_RB_LRZ_CNTL, REG_A6XX_RB_LRZ_CNTL},
+      {REG_A6XX_RB_Z_CLAMP_MIN, REG_A6XX_RB_Z_CLAMP_MAX},
+      {REG_A6XX_RB_UNKNOWN_88D0, REG_A6XX_RB_BLIT_SCISSOR_BR},
+      {REG_A6XX_RB_BIN_CONTROL2, REG_A6XX_RB_BLIT_INFO},
+      {REG_A6XX_RB_UNKNOWN_88F0, REG_A6XX_RB_UNKNOWN_88F4},
+      {REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, REG_A6XX_RB_MRT_FLAG_BUFFER(7)},
+      {REG_A6XX_RB_SAMPLE_COUNT_ADDR, REG_A6XX_RB_SAMPLE_COUNT_ADDR},
+      {REG_A6XX_RB_2D_DST_INFO, REG_A6XX_RB_2D_SRC_SOLID_C3},
+      {REG_A6XX_RB_DBG_ECO_CNTL, REG_A6XX_RB_ADDR_MODE_CNTL},
+      {REG_A6XX_RB_CCU_CNTL, REG_A6XX_RB_CCU_CNTL},
+      {REG_A6XX_VPC_GS_PARAM, REG_A6XX_VPC_POLYGON_MODE},
+      {REG_A6XX_VPC_VARYING_INTERP(0), REG_A6XX_VPC_POINT_COORD_INVERT},
+      {REG_A6XX_VPC_UNKNOWN_9300, REG_A6XX_VPC_SO_DISABLE},
+      {REG_A6XX_VPC_DBG_ECO_CNTL, REG_A6XX_VPC_PERFCTR_VPC_SEL(5)},
+      {REG_A6XX_PC_TESS_NUM_VERTEX, REG_A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL},
+      {REG_A6XX_PC_POLYGON_MODE, REG_A6XX_PC_RASTER_CNTL},
+      {REG_A6XX_PC_PRIMITIVE_CNTL_0, REG_A6XX_PC_MULTIVIEW_MASK},
+      {REG_A6XX_PC_DRAW_INDX_BASE, REG_A6XX_PC_TESSFACTOR_ADDR},
+      {REG_A6XX_PC_VSTREAM_CONTROL, REG_A6XX_PC_BIN_DRAW_STRM},
+      {REG_A6XX_PC_VISIBILITY_OVERRIDE, REG_A6XX_PC_VISIBILITY_OVERRIDE},
+      {REG_A6XX_VFD_CONTROL_0, REG_A6XX_VFD_DEST_CNTL(31)},
+      {REG_A6XX_VFD_POWER_CNTL, REG_A6XX_VFD_POWER_CNTL},
+      {REG_A6XX_SP_VS_CTRL_REG0, REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_HS_CTRL_REG0, REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_DS_CTRL_REG0, REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_GS_CTRL_REG0, REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_VS_TEX_SAMP, REG_A6XX_SP_GS_TEX_CONST},
+      {REG_A6XX_SP_FS_CTRL_REG0, REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_CS_CTRL_REG0, REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET},
+      {REG_A6XX_SP_CS_CNTL_0, REG_A6XX_SP_CS_CNTL_1},
+      {REG_A6XX_SP_FS_TEX_SAMP, REG_A6XX_SP_CS_TEX_CONST},
+      {REG_A6XX_SP_CS_IBO, REG_A6XX_SP_CS_IBO},
+      {REG_A6XX_SP_CS_IBO_COUNT, REG_A6XX_SP_CS_IBO_COUNT},
+      {REG_A6XX_SP_MODE_CONTROL, REG_A6XX_SP_BINDLESS_BASE(0)},
+      {REG_A6XX_SP_IBO, REG_A6XX_SP_IBO_COUNT},
+      {REG_A6XX_SP_CHICKEN_BITS, REG_A6XX_SP_FLOAT_CNTL},
+      {REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_UNKNOWN_B183},
+      {REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_TP_MODE_CNTL},
+      {REG_A6XX_SP_PS_2D_SRC_INFO, REG_A6XX_SP_WINDOW_OFFSET},
+      {REG_A6XX_TPL1_DBG_ECO_CNTL, REG_A6XX_TPL1_DBG_ECO_CNTL},
+      {REG_A6XX_HLSQ_VS_CNTL, REG_A6XX_HLSQ_GS_CNTL},
+      {REG_A6XX_HLSQ_FS_CNTL_0, REG_A6XX_HLSQ_CS_CNTL},
+      {REG_A6XX_HLSQ_CS_NDRANGE_0, REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z},
+      {REG_A6XX_HLSQ_CS_BINDLESS_BASE(0), REG_A6XX_HLSQ_CS_BINDLESS_BASE(0)},
+      {REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0},
+      {REG_A6XX_HLSQ_FS_CNTL, REG_A6XX_HLSQ_SHARED_CONSTS},
+      {REG_A6XX_HLSQ_BINDLESS_BASE(0), REG_A6XX_HLSQ_BINDLESS_BASE(0)},
+      {REG_A6XX_HLSQ_UNKNOWN_BE00, REG_A6XX_HLSQ_UNKNOWN_BE01},
+};
+
+/* Return true if it is expected that reg is overwritten by a renderpass or
+ * not used by anything in a renderpass. So it's safe to stomp the reg
+ * beforehand.
+ */
+static bool
+a6xx_fd_reg_rp_stompable(bool turnip, uint16_t reg)
+{
+   switch (reg) {
+   case REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS + 1:
+      return !turnip;
+   case REG_A6XX_VSC_PRIM_STRM_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_LIMIT:
+      return false;
+   case REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL:
+      return false;
+   case REG_A6XX_GRAS_SAMPLE_CONFIG ... REG_A6XX_GRAS_SAMPLE_LOCATION_1:
+      return turnip;
+   case REG_A6XX_GRAS_UNKNOWN_80AF:
+      return false;
+   case REG_A6XX_GRAS_LRZ_DEPTH_VIEW:
+      return turnip;
+   case REG_A6XX_GRAS_UNKNOWN_8110:
+      return false;
+   case REG_A6XX_GRAS_DBG_ECO_CNTL ... REG_A6XX_GRAS_PERFCTR_LRZ_SEL(3):
+      return false;
+   case REG_A6XX_RB_SAMPLE_CONFIG ... REG_A6XX_RB_SAMPLE_LOCATION_1:
+      return turnip;
+   case REG_A6XX_RB_DITHER_CNTL:
+      return !turnip;
+   case REG_A6XX_RB_UNKNOWN_8811 ... REG_A6XX_RB_UNKNOWN_881E:
+      return false;
+   case REG_A6XX_RB_ALPHA_CONTROL:
+      return !turnip;
+   case REG_A6XX_RB_UNKNOWN_88F0:
+      return false;
+   case REG_A6XX_RB_SAMPLE_COUNT_ADDR ... REG_A6XX_RB_SAMPLE_COUNT_ADDR + 1:
+      return false;
+   case REG_A6XX_RB_UNKNOWN_8E01:
+      return false;
+   case REG_A6XX_RB_DBG_ECO_CNTL ... REG_A6XX_RB_CCU_CNTL:
+      return false;
+   case REG_A6XX_RB_PERFCTR_RB_SEL(0)... REG_A6XX_RB_UNKNOWN_8E51:
+      return false;
+   case REG_A6XX_VPC_UNKNOWN_9210 ... REG_A6XX_VPC_UNKNOWN_9211:
+      return false;
+   case REG_A6XX_VPC_SO(0) ... REG_A6XX_VPC_POINT_COORD_INVERT:
+      return false;
+   case REG_A6XX_VPC_UNKNOWN_9300:
+      return false;
+   case REG_A6XX_VPC_DBG_ECO_CNTL ... REG_A6XX_VPC_PERFCTR_VPC_SEL(5):
+      return false;
+   case REG_A6XX_PC_DRAW_CMD ... REG_A6XX_PC_MARKER:
+      return false;
+   case REG_A6XX_PC_DBG_ECO_CNTL ... REG_A6XX_PC_ADDR_MODE_CNTL:
+      return false;
+   case REG_A6XX_PC_TESSFACTOR_ADDR:
+      return false;
+   case REG_A6XX_VFD_MODE_CNTL:
+      return false;
+   case REG_A6XX_VFD_ADD_OFFSET:
+      return false;
+   case REG_A6XX_SP_UNKNOWN_A9A8:
+      return false;
+   case REG_A6XX_SP_DBG_ECO_CNTL ... REG_A6XX_SP_PERFCTR_SP_SEL(23):
+      return false;
+   case REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_UNKNOWN_B183:
+      return false;
+   case REG_A6XX_SP_UNKNOWN_B190 ... REG_A6XX_SP_UNKNOWN_B191:
+      return false;
+   case REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_TP_SAMPLE_LOCATION_1:
+      return false;
+   case REG_A6XX_SP_TP_MODE_CNTL:
+      return false;
+   case REG_A6XX_TPL1_DBG_ECO_CNTL ... REG_A6XX_TPL1_PERFCTR_TP_SEL(11):
+      return false;
+   case REG_A6XX_HLSQ_UNKNOWN_BE00 ... REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL(5):
+      return false;
+
+   /* We should not stomp compute pipeline since they write registers directly
+    * into command stream and should survive renderpass.
+    */
+   case REG_A6XX_SP_CS_CTRL_REG0 ... REG_A6XX_SP_CS_CNTL_1:
+      return false;
+   case REG_A6XX_HLSQ_CS_CNTL ... REG_A6XX_HLSQ_CS_CNTL_1:
+      return false;
+   }
+
+   return true;
+}
+
+/* Stomping some regs is known to cause issues */
+static bool
+a6xx_fd_reg_do_not_stomp(bool turnip, uint16_t reg)
+{
+   switch (reg) {
+   /* Faults in
+    * dEQP-VK.renderpass.suballocation.formats.r5g6b5_unorm_pack16.clear.clear
+    * It seems that PC_CCU_FLUSH_COLOR_TS reads REG_A6XX_RB_DEPTH_PLANE_CNTL.
+    */
+   case REG_A6XX_RB_DEPTH_PLANE_CNTL:
+      return true;
+   /* Faults in
+    * dEQP-VK.conditional_rendering.draw.condition_host_memory_expect_noop.draw */
+   case REG_A6XX_HLSQ_VS_CNTL ... REG_A6XX_HLSQ_GS_CNTL:
+      return true;
+   case REG_A6XX_HLSQ_FS_CNTL:
+      return true;
+   /* Faults in
+    * dEQP-VK.memory_model.message_passing.ext.u32.coherent.atomic_atomic.atomicrmw.device.payload_local.image.guard_local.image.comp
+    * while there is even no fragment shaders.
+    */
+   case REG_A6XX_SP_FS_OBJ_START ... REG_A6XX_SP_FS_OBJ_START + 1:
+      return true;
+   }
+
+   return false;
+}
+
+#endif /* __FREEDRENO_STOMPABLE_REGS_H__ */
\ No newline at end of file
index 7347472..88d0324 100644 (file)
@@ -390,6 +390,10 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
                  bool ubwc,
                  bool scissor)
 {
+   if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) {
+      tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
+   }
+
    enum a6xx_format fmt = blit_base_format(dst_format, ubwc);
    fixup_dst_format(src_format, &dst_format, &fmt);
    enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format);
@@ -1224,6 +1228,10 @@ r3d_setup(struct tu_cmd_buffer *cmd,
           bool ubwc,
           VkSampleCountFlagBits samples)
 {
+   if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) {
+      tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
+   }
+
    enum a6xx_format fmt = blit_base_format(dst_format, ubwc);
    fixup_dst_format(src_format, &dst_format, &fmt);
 
index d6a0424..a659faf 100644 (file)
@@ -939,6 +939,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu_cs_emit_wfi(cs);
 
+   if (dev->dbg_cmdbuf_stomp_cs) {
+      tu_cs_emit_call(cs, dev->dbg_cmdbuf_stomp_cs);
+   }
+
    cmd->state.cache.pending_flush_bits &=
       ~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
 
@@ -4321,6 +4325,10 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
       return;
    }
 
+   if (cmd->device->dbg_renderpass_stomp_cs) {
+      tu_cs_emit_call(&cmd->cs, cmd->device->dbg_renderpass_stomp_cs);
+   }
+
    for (unsigned i = 0; i < pass->attachment_count; i++) {
       cmd->state.attachments[i] = pAttachmentInfo ?
          tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) :
index 7c2f5e3..8c7cced 100644 (file)
@@ -24,6 +24,7 @@
 
 /* for fd_get_driver/device_uuid() */
 #include "freedreno/common/freedreno_uuid.h"
+#include "freedreno/common/freedreno_stompable_regs.h"
 
 #include "tu_clear_blit.h"
 #include "tu_cmd_buffer.h"
@@ -1969,6 +1970,86 @@ tu_u_trace_submission_data_finish(
    vk_free(&device->vk.alloc, submission_data);
 }
 
+enum tu_reg_stomper_flags
+{
+   TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
+   TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
+   TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
+};
+
+static const struct debug_named_value tu_reg_stomper_options[] = {
+   { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
+     "By default the range specifies the regs to stomp, with 'inverse' it "
+     "specifies the regs NOT to stomp" },
+   { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
+     "Stomp regs at the start of a cmdbuf" },
+   { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
+     "Stomp regs before a renderpass" },
+   { NULL, 0 }
+};
+
+static void
+tu_init_dbg_reg_stomper(struct tu_device *device)
+{
+   const char *stale_reg_range_str =
+      os_get_option("TU_DEBUG_STALE_REGS_RANGE");
+   if (!stale_reg_range_str)
+      return;
+
+   uint32_t first_reg, last_reg;
+
+   if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
+      mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
+      return;
+   }
+
+   uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
+                                                 tu_reg_stomper_options,
+                                                 TU_DEBUG_REG_STOMP_CMDBUF);
+
+   struct tu_cs *cmdbuf_cs = calloc(1, sizeof(struct tu_cs));
+   tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
+              "cmdbuf reg stomp cs");
+   tu_cs_begin(cmdbuf_cs);
+
+   struct tu_cs *rp_cs = calloc(1, sizeof(struct tu_cs));
+   tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
+   tu_cs_begin(rp_cs);
+
+   size_t reg_ranges_count = ARRAY_SIZE(a6xx_fd_cmdbuf_stompable_reg_ranges);
+   for (size_t i = 0; i < reg_ranges_count; i++) {
+      struct fd_stompable_reg_range reg_range =
+         a6xx_fd_cmdbuf_stompable_reg_ranges[i];
+      for (uint16_t reg = reg_range.start_reg; reg <= reg_range.end_reg;
+           reg++) {
+         if (debug_flags & TU_DEBUG_REG_STOMP_INVERSE) {
+            if (reg >= first_reg && reg <= last_reg)
+               continue;
+         } else {
+            if (reg < first_reg || reg > last_reg)
+               continue;
+         }
+
+         if (a6xx_fd_reg_do_not_stomp(true, reg))
+            continue;
+
+         if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF)
+            tu_cs_emit_write_reg(cmdbuf_cs, reg, 0xffffffff);
+
+         if ((debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) &&
+             a6xx_fd_reg_rp_stompable(true, reg)) {
+            tu_cs_emit_write_reg(rp_cs, reg, 0xffffffff);
+         }
+      }
+   }
+
+   tu_cs_end(cmdbuf_cs);
+   tu_cs_end(rp_cs);
+
+   device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
+   device->dbg_renderpass_stomp_cs = rp_cs;
+}
+
 VKAPI_ATTR VkResult VKAPI_CALL
 tu_CreateDevice(VkPhysicalDevice physicalDevice,
                 const VkDeviceCreateInfo *pCreateInfo,
@@ -2201,6 +2282,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
       }
    }
 
+   tu_init_dbg_reg_stomper(device);
+
    /* Initialize a condition variable for timeline semaphore */
    pthread_condattr_t condattr;
    if (pthread_condattr_init(&condattr) != 0) {
@@ -2334,6 +2417,16 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
       free(device->perfcntrs_pass_cs);
    }
 
+   if (device->dbg_cmdbuf_stomp_cs) {
+      tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
+      free(device->dbg_cmdbuf_stomp_cs);
+   }
+
+   if (device->dbg_renderpass_stomp_cs) {
+      tu_cs_finish(device->dbg_renderpass_stomp_cs);
+      free(device->dbg_renderpass_stomp_cs);
+   }
+
    tu_autotune_fini(&device->autotune, device);
 
    tu_bo_suballocator_finish(&device->pipeline_suballoc);
index 809b81d..4de58d6 100644 (file)
@@ -307,6 +307,9 @@ struct tu_device
 
    struct breadcrumbs_context *breadcrumbs_ctx;
 
+   struct tu_cs *dbg_cmdbuf_stomp_cs;
+   struct tu_cs *dbg_renderpass_stomp_cs;
+
 #ifdef ANDROID
    const void *gralloc;
    enum {