drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4 #include <linux/ascii85.h>
   5 #include "msm_gem.h"
   6 #include "a6xx_gpu.h"
   7 #include "a6xx_gmu.h"
   8 #include "a6xx_gpu_state.h"
   9 #include "a6xx_gmu.xml.h"
  10
  11 struct a6xx_gpu_state_obj {
  12         const void *handle;
  13         u32 *data;
  14 };
  15
  16 struct a6xx_gpu_state {
  17         struct msm_gpu_state base;
  18
  19         struct a6xx_gpu_state_obj *gmu_registers;
  20         int nr_gmu_registers;
  21
  22         struct a6xx_gpu_state_obj *registers;
  23         int nr_registers;
  24
  25         struct a6xx_gpu_state_obj *shaders;
  26         int nr_shaders;
  27
  28         struct a6xx_gpu_state_obj *clusters;
  29         int nr_clusters;
  30
  31         struct a6xx_gpu_state_obj *dbgahb_clusters;
  32         int nr_dbgahb_clusters;
  33
  34         struct a6xx_gpu_state_obj *indexed_regs;
  35         int nr_indexed_regs;
  36
  37         struct a6xx_gpu_state_obj *debugbus;
  38         int nr_debugbus;
  39
  40         struct a6xx_gpu_state_obj *vbif_debugbus;
  41
  42         struct a6xx_gpu_state_obj *cx_debugbus;
  43         int nr_cx_debugbus;
  44
  45         struct msm_gpu_state_bo *gmu_log;
  46         struct msm_gpu_state_bo *gmu_hfi;
  47         struct msm_gpu_state_bo *gmu_debug;
  48
  49         s32 hfi_queue_history[2][HFI_HISTORY_SZ];
  50
  51         struct list_head objs;
  52
  53         bool gpu_initialized;
  54 };
  55
  56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  57 {
  58         in[0] = val;
  59         in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  60
  61         return 2;
  62 }
  63
  64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  65 {
  66         in[0] = target;
  67         in[1] = (((u64) reg) << 44 | dwords);
  68
  69         return 2;
  70 }
  71
  72 static inline int CRASHDUMP_FINI(u64 *in)
  73 {
  74         in[0] = 0;
  75         in[1] = 0;
  76
  77         return 2;
  78 }
  79
  80 struct a6xx_crashdumper {
  81         void *ptr;
  82         struct drm_gem_object *bo;
  83         u64 iova;
  84 };
  85
  86 struct a6xx_state_memobj {
  87         struct list_head node;
  88         unsigned long long data[];
  89 };
  90
  91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
  92 {
  93         struct a6xx_state_memobj *obj =
  94                 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
  95
  96         if (!obj)
  97                 return NULL;
  98
  99         list_add_tail(&obj->node, &a6xx_state->objs);
 100         return &obj->data;
 101 }
 102
 103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
 104                 size_t size)
 105 {
 106         void *dst = state_kcalloc(a6xx_state, 1, size);
 107
 108         if (dst)
 109                 memcpy(dst, src, size);
 110         return dst;
 111 }
 112
 113 /*
 114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 115  * the rest for the data
 116  */
 117 #define A6XX_CD_DATA_OFFSET 8192
 118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 119
 120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 121                 struct a6xx_crashdumper *dumper)
 122 {
 123         dumper->ptr = msm_gem_kernel_new(gpu->dev,
 124                 SZ_1M, MSM_BO_WC, gpu->aspace,
 125                 &dumper->bo, &dumper->iova);
 126
 127         if (!IS_ERR(dumper->ptr))
 128                 msm_gem_object_set_name(dumper->bo, "crashdump");
 129
 130         return PTR_ERR_OR_ZERO(dumper->ptr);
 131 }
 132
 133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 134                 struct a6xx_crashdumper *dumper)
 135 {
 136         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 137         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 138         u32 val;
 139         int ret;
 140
 141         if (IS_ERR_OR_NULL(dumper->ptr))
 142                 return -EINVAL;
 143
 144         if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 145                 return -EINVAL;
 146
 147         /* Make sure all pending memory writes are posted */
 148         wmb();
 149
 150         gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
 151                 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
 152
 153         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 154
 155         ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 156                 val & 0x02, 100, 10000);
 157
 158         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 159
 160         return ret;
 161 }
 162
 163 /* read a value from the GX debug bus */
 164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 165                 u32 *data)
 166 {
 167         u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 168                 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 169
 170         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 171         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 172         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 173         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 174
 175         /* Wait 1 us to make sure the data is flowing */
 176         udelay(1);
 177
 178         data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 179         data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 180
 181         return 2;
 182 }
 183
 184 #define cxdbg_write(ptr, offset, val) \
 185         msm_writel((val), (ptr) + ((offset) << 2))
 186
 187 #define cxdbg_read(ptr, offset) \
 188         msm_readl((ptr) + ((offset) << 2))
 189
 190 /* read a value from the CX debug bus */
 191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
 192                 u32 *data)
 193 {
 194         u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 195                 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 196
 197         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 198         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 199         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 200         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 201
 202         /* Wait 1 us to make sure the data is flowing */
 203         udelay(1);
 204
 205         data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 206         data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 207
 208         return 2;
 209 }
 210
 211 /* Read a chunk of data from the VBIF debug bus */
 212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 213                 u32 reg, int count, u32 *data)
 214 {
 215         int i;
 216
 217         gpu_write(gpu, ctrl0, reg);
 218
 219         for (i = 0; i < count; i++) {
 220                 gpu_write(gpu, ctrl1, i);
 221                 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 222         }
 223
 224         return count;
 225 }
 226
 227 #define AXI_ARB_BLOCKS 2
 228 #define XIN_AXI_BLOCKS 5
 229 #define XIN_CORE_BLOCKS 4
 230
 231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
 232         ((16 * AXI_ARB_BLOCKS) + \
 233          (18 * XIN_AXI_BLOCKS) + \
 234          (12 * XIN_CORE_BLOCKS))
 235
 236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 237                 struct a6xx_gpu_state *a6xx_state,
 238                 struct a6xx_gpu_state_obj *obj)
 239 {
 240         u32 clk, *ptr;
 241         int i;
 242
 243         obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 244                 sizeof(u32));
 245         if (!obj->data)
 246                 return;
 247
 248         obj->handle = NULL;
 249
 250         /* Get the current clock setting */
 251         clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 252
 253         /* Force on the bus so we can read it */
 254         gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 255                 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 256
 257         /* We will read from BUS2 first, so disable BUS1 */
 258         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 259
 260         /* Enable the VBIF bus for reading */
 261         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 262
 263         ptr = obj->data;
 264
 265         for (i = 0; i < AXI_ARB_BLOCKS; i++)
 266                 ptr += vbif_debugbus_read(gpu,
 267                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 268                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 269                         1 << (i + 16), 16, ptr);
 270
 271         for (i = 0; i < XIN_AXI_BLOCKS; i++)
 272                 ptr += vbif_debugbus_read(gpu,
 273                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 274                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 275                         1 << i, 18, ptr);
 276
 277         /* Stop BUS2 so we can turn on BUS1 */
 278         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 279
 280         for (i = 0; i < XIN_CORE_BLOCKS; i++)
 281                 ptr += vbif_debugbus_read(gpu,
 282                         REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 283                         REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 284                         1 << i, 12, ptr);
 285
 286         /* Restore the VBIF clock setting */
 287         gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 288 }
 289
 290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 291                 struct a6xx_gpu_state *a6xx_state,
 292                 const struct a6xx_debugbus_block *block,
 293                 struct a6xx_gpu_state_obj *obj)
 294 {
 295         int i;
 296         u32 *ptr;
 297
 298         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 299         if (!obj->data)
 300                 return;
 301
 302         obj->handle = block;
 303
 304         for (ptr = obj->data, i = 0; i < block->count; i++)
 305                 ptr += debugbus_read(gpu, block->id, i, ptr);
 306 }
 307
 308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 309                 struct a6xx_gpu_state *a6xx_state,
 310                 const struct a6xx_debugbus_block *block,
 311                 struct a6xx_gpu_state_obj *obj)
 312 {
 313         int i;
 314         u32 *ptr;
 315
 316         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 317         if (!obj->data)
 318                 return;
 319
 320         obj->handle = block;
 321
 322         for (ptr = obj->data, i = 0; i < block->count; i++)
 323                 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 324 }
 325
 326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
 327                 struct a6xx_gpu_state *a6xx_state)
 328 {
 329         struct resource *res;
 330         void __iomem *cxdbg = NULL;
 331         int nr_debugbus_blocks;
 332
 333         /* Set up the GX debug bus */
 334
 335         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 336                 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 337
 338         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 339                 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 340
 341         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 342         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 343         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 344         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 345
 346         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 347         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 348
 349         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 350         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 351         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 352         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 353
 354         /* Set up the CX debug bus - it lives elsewhere in the system so do a
 355          * temporary ioremap for the registers
 356          */
 357         res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 358                         "cx_dbgc");
 359
 360         if (res)
 361                 cxdbg = ioremap(res->start, resource_size(res));
 362
 363         if (cxdbg) {
 364                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 365                         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 366
 367                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 368                         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 369
 370                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 371                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 372                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 373                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 374
 375                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 376                         0x76543210);
 377                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 378                         0xFEDCBA98);
 379
 380                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 381                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 382                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 383                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 384         }
 385
 386         nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 387                 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 388
 389         a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 390                         sizeof(*a6xx_state->debugbus));
 391
 392         if (a6xx_state->debugbus) {
 393                 int i;
 394
 395                 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 396                         a6xx_get_debugbus_block(gpu,
 397                                 a6xx_state,
 398                                 &a6xx_debugbus_blocks[i],
 399                                 &a6xx_state->debugbus[i]);
 400
 401                 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 402
 403                 /*
 404                  * GBIF has same debugbus as of other GPU blocks, fall back to
 405                  * default path if GPU uses GBIF, also GBIF uses exactly same
 406                  * ID as of VBIF.
 407                  */
 408                 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 409                         a6xx_get_debugbus_block(gpu, a6xx_state,
 410                                 &a6xx_gbif_debugbus_block,
 411                                 &a6xx_state->debugbus[i]);
 412
 413                         a6xx_state->nr_debugbus += 1;
 414                 }
 415         }
 416
 417         /*  Dump the VBIF debugbus on applicable targets */
 418         if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
 419                 a6xx_state->vbif_debugbus =
 420                         state_kcalloc(a6xx_state, 1,
 421                                         sizeof(*a6xx_state->vbif_debugbus));
 422
 423                 if (a6xx_state->vbif_debugbus)
 424                         a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 425                                         a6xx_state->vbif_debugbus);
 426         }
 427
 428         if (cxdbg) {
 429                 a6xx_state->cx_debugbus =
 430                         state_kcalloc(a6xx_state,
 431                         ARRAY_SIZE(a6xx_cx_debugbus_blocks),
 432                         sizeof(*a6xx_state->cx_debugbus));
 433
 434                 if (a6xx_state->cx_debugbus) {
 435                         int i;
 436
 437                         for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
 438                                 a6xx_get_cx_debugbus_block(cxdbg,
 439                                         a6xx_state,
 440                                         &a6xx_cx_debugbus_blocks[i],
 441                                         &a6xx_state->cx_debugbus[i]);
 442
 443                         a6xx_state->nr_cx_debugbus =
 444                                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 445                 }
 446
 447                 iounmap(cxdbg);
 448         }
 449 }
 450
 451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 452
 453 /* Read a data cluster from behind the AHB aperture */
 454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 455                 struct a6xx_gpu_state *a6xx_state,
 456                 const struct a6xx_dbgahb_cluster *dbgahb,
 457                 struct a6xx_gpu_state_obj *obj,
 458                 struct a6xx_crashdumper *dumper)
 459 {
 460         u64 *in = dumper->ptr;
 461         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 462         size_t datasize;
 463         int i, regcount = 0;
 464
 465         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 466                 int j;
 467
 468                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 469                         (dbgahb->statetype + i * 2) << 8);
 470
 471                 for (j = 0; j < dbgahb->count; j += 2) {
 472                         int count = RANGE(dbgahb->registers, j);
 473                         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 474                                 dbgahb->registers[j] - (dbgahb->base >> 2);
 475
 476                         in += CRASHDUMP_READ(in, offset, count, out);
 477
 478                         out += count * sizeof(u32);
 479
 480                         if (i == 0)
 481                                 regcount += count;
 482                 }
 483         }
 484
 485         CRASHDUMP_FINI(in);
 486
 487         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 488
 489         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 490                 return;
 491
 492         if (a6xx_crashdumper_run(gpu, dumper))
 493                 return;
 494
 495         obj->handle = dbgahb;
 496         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 497                 datasize);
 498 }
 499
 500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 501                 struct a6xx_gpu_state *a6xx_state,
 502                 struct a6xx_crashdumper *dumper)
 503 {
 504         int i;
 505
 506         a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 507                 ARRAY_SIZE(a6xx_dbgahb_clusters),
 508                 sizeof(*a6xx_state->dbgahb_clusters));
 509
 510         if (!a6xx_state->dbgahb_clusters)
 511                 return;
 512
 513         a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 514
 515         for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 516                 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 517                         &a6xx_dbgahb_clusters[i],
 518                         &a6xx_state->dbgahb_clusters[i], dumper);
 519 }
 520
 521 /* Read a data cluster from the CP aperture with the crashdumper */
 522 static void a6xx_get_cluster(struct msm_gpu *gpu,
 523                 struct a6xx_gpu_state *a6xx_state,
 524                 const struct a6xx_cluster *cluster,
 525                 struct a6xx_gpu_state_obj *obj,
 526                 struct a6xx_crashdumper *dumper)
 527 {
 528         u64 *in = dumper->ptr;
 529         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 530         size_t datasize;
 531         int i, regcount = 0;
 532
 533         /* Some clusters need a selector register to be programmed too */
 534         if (cluster->sel_reg)
 535                 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 536
 537         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 538                 int j;
 539
 540                 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 541                         (cluster->id << 8) | (i << 4) | i);
 542
 543                 for (j = 0; j < cluster->count; j += 2) {
 544                         int count = RANGE(cluster->registers, j);
 545
 546                         in += CRASHDUMP_READ(in, cluster->registers[j],
 547                                 count, out);
 548
 549                         out += count * sizeof(u32);
 550
 551                         if (i == 0)
 552                                 regcount += count;
 553                 }
 554         }
 555
 556         CRASHDUMP_FINI(in);
 557
 558         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 559
 560         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 561                 return;
 562
 563         if (a6xx_crashdumper_run(gpu, dumper))
 564                 return;
 565
 566         obj->handle = cluster;
 567         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 568                 datasize);
 569 }
 570
 571 static void a6xx_get_clusters(struct msm_gpu *gpu,
 572                 struct a6xx_gpu_state *a6xx_state,
 573                 struct a6xx_crashdumper *dumper)
 574 {
 575         int i;
 576
 577         a6xx_state->clusters = state_kcalloc(a6xx_state,
 578                 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 579
 580         if (!a6xx_state->clusters)
 581                 return;
 582
 583         a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 584
 585         for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 586                 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 587                         &a6xx_state->clusters[i], dumper);
 588 }
 589
 590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
 592                 struct a6xx_gpu_state *a6xx_state,
 593                 const struct a6xx_shader_block *block,
 594                 struct a6xx_gpu_state_obj *obj,
 595                 struct a6xx_crashdumper *dumper)
 596 {
 597         u64 *in = dumper->ptr;
 598         size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 599         int i;
 600
 601         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 602                 return;
 603
 604         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 605                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 606                         (block->type << 8) | i);
 607
 608                 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 609                         block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
 610         }
 611
 612         CRASHDUMP_FINI(in);
 613
 614         if (a6xx_crashdumper_run(gpu, dumper))
 615                 return;
 616
 617         obj->handle = block;
 618         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 619                 datasize);
 620 }
 621
 622 static void a6xx_get_shaders(struct msm_gpu *gpu,
 623                 struct a6xx_gpu_state *a6xx_state,
 624                 struct a6xx_crashdumper *dumper)
 625 {
 626         int i;
 627
 628         a6xx_state->shaders = state_kcalloc(a6xx_state,
 629                 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 630
 631         if (!a6xx_state->shaders)
 632                 return;
 633
 634         a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 635
 636         for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 637                 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 638                         &a6xx_state->shaders[i], dumper);
 639 }
 640
 641 /* Read registers from behind the HLSQ aperture with the crashdumper */
 642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 643                 struct a6xx_gpu_state *a6xx_state,
 644                 const struct a6xx_registers *regs,
 645                 struct a6xx_gpu_state_obj *obj,
 646                 struct a6xx_crashdumper *dumper)
 647
 648 {
 649         u64 *in = dumper->ptr;
 650         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 651         int i, regcount = 0;
 652
 653         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 654
 655         for (i = 0; i < regs->count; i += 2) {
 656                 u32 count = RANGE(regs->registers, i);
 657                 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 658                         regs->registers[i] - (regs->val0 >> 2);
 659
 660                 in += CRASHDUMP_READ(in, offset, count, out);
 661
 662                 out += count * sizeof(u32);
 663                 regcount += count;
 664         }
 665
 666         CRASHDUMP_FINI(in);
 667
 668         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 669                 return;
 670
 671         if (a6xx_crashdumper_run(gpu, dumper))
 672                 return;
 673
 674         obj->handle = regs;
 675         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 676                 regcount * sizeof(u32));
 677 }
 678
 679 /* Read a block of registers using the crashdumper */
 680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
 681                 struct a6xx_gpu_state *a6xx_state,
 682                 const struct a6xx_registers *regs,
 683                 struct a6xx_gpu_state_obj *obj,
 684                 struct a6xx_crashdumper *dumper)
 685
 686 {
 687         u64 *in = dumper->ptr;
 688         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 689         int i, regcount = 0;
 690
 691         /* Some blocks might need to program a selector register first */
 692         if (regs->val0)
 693                 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
 694
 695         for (i = 0; i < regs->count; i += 2) {
 696                 u32 count = RANGE(regs->registers, i);
 697
 698                 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
 699
 700                 out += count * sizeof(u32);
 701                 regcount += count;
 702         }
 703
 704         CRASHDUMP_FINI(in);
 705
 706         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 707                 return;
 708
 709         if (a6xx_crashdumper_run(gpu, dumper))
 710                 return;
 711
 712         obj->handle = regs;
 713         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 714                 regcount * sizeof(u32));
 715 }
 716
 717 /* Read a block of registers via AHB */
 718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
 719                 struct a6xx_gpu_state *a6xx_state,
 720                 const struct a6xx_registers *regs,
 721                 struct a6xx_gpu_state_obj *obj)
 722 {
 723         int i, regcount = 0, index = 0;
 724
 725         for (i = 0; i < regs->count; i += 2)
 726                 regcount += RANGE(regs->registers, i);
 727
 728         obj->handle = (const void *) regs;
 729         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 730         if (!obj->data)
 731                 return;
 732
 733         for (i = 0; i < regs->count; i += 2) {
 734                 u32 count = RANGE(regs->registers, i);
 735                 int j;
 736
 737                 for (j = 0; j < count; j++)
 738                         obj->data[index++] = gpu_read(gpu,
 739                                 regs->registers[i] + j);
 740         }
 741 }
 742
 743 /* Read a block of GMU registers */
 744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
 745                 struct a6xx_gpu_state *a6xx_state,
 746                 const struct a6xx_registers *regs,
 747                 struct a6xx_gpu_state_obj *obj,
 748                 bool rscc)
 749 {
 750         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 751         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 752         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 753         int i, regcount = 0, index = 0;
 754
 755         for (i = 0; i < regs->count; i += 2)
 756                 regcount += RANGE(regs->registers, i);
 757
 758         obj->handle = (const void *) regs;
 759         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 760         if (!obj->data)
 761                 return;
 762
 763         for (i = 0; i < regs->count; i += 2) {
 764                 u32 count = RANGE(regs->registers, i);
 765                 int j;
 766
 767                 for (j = 0; j < count; j++) {
 768                         u32 offset = regs->registers[i] + j;
 769                         u32 val;
 770
 771                         if (rscc)
 772                                 val = gmu_read_rscc(gmu, offset);
 773                         else
 774                                 val = gmu_read(gmu, offset);
 775
 776                         obj->data[index++] = val;
 777                 }
 778         }
 779 }
 780
 781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 782                 struct a6xx_gpu_state *a6xx_state)
 783 {
 784         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 785         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 786
 787         a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
 788                 3, sizeof(*a6xx_state->gmu_registers));
 789
 790         if (!a6xx_state->gmu_registers)
 791                 return;
 792
 793         a6xx_state->nr_gmu_registers = 3;
 794
 795         /* Get the CX GMU registers from AHB */
 796         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
 797                 &a6xx_state->gmu_registers[0], false);
 798         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
 799                 &a6xx_state->gmu_registers[1], true);
 800
 801         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 802                 return;
 803
 804         /* Set the fence to ALLOW mode so we can access the registers */
 805         gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
 806
 807         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
 808                 &a6xx_state->gmu_registers[2], false);
 809 }
 810
 811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
 812                 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
 813 {
 814         struct msm_gpu_state_bo *snapshot;
 815
 816         if (!bo->size)
 817                 return NULL;
 818
 819         snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
 820         if (!snapshot)
 821                 return NULL;
 822
 823         snapshot->iova = bo->iova;
 824         snapshot->size = bo->size;
 825         snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
 826         if (!snapshot->data)
 827                 return NULL;
 828
 829         memcpy(snapshot->data, bo->virt, bo->size);
 830
 831         return snapshot;
 832 }
 833
 834 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
 835                                           struct a6xx_gpu_state *a6xx_state)
 836 {
 837         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 838         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 839         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 840         unsigned i, j;
 841
 842         BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
 843
 844         for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
 845                 struct a6xx_hfi_queue *queue = &gmu->queues[i];
 846                 for (j = 0; j < HFI_HISTORY_SZ; j++) {
 847                         unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
 848                         a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
 849                 }
 850         }
 851 }
 852
 853 #define A6XX_GBIF_REGLIST_SIZE   1
 854 static void a6xx_get_registers(struct msm_gpu *gpu,
 855                 struct a6xx_gpu_state *a6xx_state,
 856                 struct a6xx_crashdumper *dumper)
 857 {
 858         int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 859                 ARRAY_SIZE(a6xx_reglist) +
 860                 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
 861         int index = 0;
 862         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 863
 864         a6xx_state->registers = state_kcalloc(a6xx_state,
 865                 count, sizeof(*a6xx_state->registers));
 866
 867         if (!a6xx_state->registers)
 868                 return;
 869
 870         a6xx_state->nr_registers = count;
 871
 872         for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
 873                 a6xx_get_ahb_gpu_registers(gpu,
 874                         a6xx_state, &a6xx_ahb_reglist[i],
 875                         &a6xx_state->registers[index++]);
 876
 877         if (a6xx_has_gbif(adreno_gpu))
 878                 a6xx_get_ahb_gpu_registers(gpu,
 879                                 a6xx_state, &a6xx_gbif_reglist,
 880                                 &a6xx_state->registers[index++]);
 881         else
 882                 a6xx_get_ahb_gpu_registers(gpu,
 883                                 a6xx_state, &a6xx_vbif_reglist,
 884                                 &a6xx_state->registers[index++]);
 885         if (!dumper) {
 886                 /*
 887                  * We can't use the crashdumper when the SMMU is stalled,
 888                  * because the GPU has no memory access until we resume
 889                  * translation (but we don't want to do that until after
 890                  * we have captured as much useful GPU state as possible).
 891                  * So instead collect registers via the CPU:
 892                  */
 893                 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 894                         a6xx_get_ahb_gpu_registers(gpu,
 895                                 a6xx_state, &a6xx_reglist[i],
 896                                 &a6xx_state->registers[index++]);
 897                 return;
 898         }
 899
 900         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 901                 a6xx_get_crashdumper_registers(gpu,
 902                         a6xx_state, &a6xx_reglist[i],
 903                         &a6xx_state->registers[index++],
 904                         dumper);
 905
 906         for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
 907                 a6xx_get_crashdumper_hlsq_registers(gpu,
 908                         a6xx_state, &a6xx_hlsq_reglist[i],
 909                         &a6xx_state->registers[index++],
 910                         dumper);
 911 }
 912
 913 /* Read a block of data from an indexed register pair */
 914 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
 915                 struct a6xx_gpu_state *a6xx_state,
 916                 const struct a6xx_indexed_registers *indexed,
 917                 struct a6xx_gpu_state_obj *obj)
 918 {
 919         int i;
 920
 921         obj->handle = (const void *) indexed;
 922         obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
 923         if (!obj->data)
 924                 return;
 925
 926         /* All the indexed banks start at address 0 */
 927         gpu_write(gpu, indexed->addr, 0);
 928
 929         /* Read the data - each read increments the internal address by 1 */
 930         for (i = 0; i < indexed->count; i++)
 931                 obj->data[i] = gpu_read(gpu, indexed->data);
 932 }
 933
 934 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 935                 struct a6xx_gpu_state *a6xx_state)
 936 {
 937         u32 mempool_size;
 938         int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
 939         int i;
 940
 941         a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
 942                 sizeof(*a6xx_state->indexed_regs));
 943         if (!a6xx_state->indexed_regs)
 944                 return;
 945
 946         for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
 947                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
 948                         &a6xx_state->indexed_regs[i]);
 949
 950         /* Set the CP mempool size to 0 to stabilize it while dumping */
 951         mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
 952         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
 953
 954         /* Get the contents of the CP mempool */
 955         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
 956                 &a6xx_state->indexed_regs[i]);
 957
 958         /*
 959          * Offset 0x2000 in the mempool is the size - copy the saved size over
 960          * so the data is consistent
 961          */
 962         a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
 963
 964         /* Restore the size in the hardware */
 965         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
 966
 967         a6xx_state->nr_indexed_regs = count;
 968 }
 969
 970 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 971 {
 972         struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
 973         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 974         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 975         struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 976                 GFP_KERNEL);
 977         bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
 978                         A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
 979
 980         if (!a6xx_state)
 981                 return ERR_PTR(-ENOMEM);
 982
 983         INIT_LIST_HEAD(&a6xx_state->objs);
 984
 985         /* Get the generic state from the adreno core */
 986         adreno_gpu_state_get(gpu, &a6xx_state->base);
 987
 988         a6xx_get_gmu_registers(gpu, a6xx_state);
 989
 990         a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
 991         a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
 992         a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
 993
 994         a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
 995
 996         /* If GX isn't on the rest of the data isn't going to be accessible */
 997         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 998                 return &a6xx_state->base;
 999
1000         /* Get the banks of indexed registers */
1001         a6xx_get_indexed_registers(gpu, a6xx_state);
1002
1003         /*
1004          * Try to initialize the crashdumper, if we are not dumping state
1005          * with the SMMU stalled.  The crashdumper needs memory access to
1006          * write out GPU state, so we need to skip this when the SMMU is
1007          * stalled in response to an iova fault
1008          */
1009         if (!stalled && !gpu->needs_hw_init &&
1010             !a6xx_crashdumper_init(gpu, &_dumper)) {
1011                 dumper = &_dumper;
1012         }
1013
1014         a6xx_get_registers(gpu, a6xx_state, dumper);
1015
1016         if (dumper) {
1017                 a6xx_get_shaders(gpu, a6xx_state, dumper);
1018                 a6xx_get_clusters(gpu, a6xx_state, dumper);
1019                 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1020
1021                 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1022         }
1023
1024         if (snapshot_debugbus)
1025                 a6xx_get_debugbus(gpu, a6xx_state);
1026
1027         a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1028
1029         return  &a6xx_state->base;
1030 }
1031
1032 static void a6xx_gpu_state_destroy(struct kref *kref)
1033 {
1034         struct a6xx_state_memobj *obj, *tmp;
1035         struct msm_gpu_state *state = container_of(kref,
1036                         struct msm_gpu_state, ref);
1037         struct a6xx_gpu_state *a6xx_state = container_of(state,
1038                         struct a6xx_gpu_state, base);
1039
1040         if (a6xx_state->gmu_log)
1041                 kvfree(a6xx_state->gmu_log->data);
1042
1043         if (a6xx_state->gmu_hfi)
1044                 kvfree(a6xx_state->gmu_hfi->data);
1045
1046         if (a6xx_state->gmu_debug)
1047                 kvfree(a6xx_state->gmu_debug->data);
1048
1049         list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1050                 list_del(&obj->node);
1051                 kvfree(obj);
1052         }
1053
1054         adreno_gpu_state_destroy(state);
1055         kfree(a6xx_state);
1056 }
1057
1058 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1059 {
1060         if (IS_ERR_OR_NULL(state))
1061                 return 1;
1062
1063         return kref_put(&state->ref, a6xx_gpu_state_destroy);
1064 }
1065
1066 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1067                 struct drm_printer *p)
1068 {
1069         int i, index = 0;
1070
1071         if (!data)
1072                 return;
1073
1074         for (i = 0; i < count; i += 2) {
1075                 u32 count = RANGE(registers, i);
1076                 u32 offset = registers[i];
1077                 int j;
1078
1079                 for (j = 0; j < count; index++, offset++, j++) {
1080                         if (data[index] == 0xdeafbead)
1081                                 continue;
1082
1083                         drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1084                                 offset << 2, data[index]);
1085                 }
1086         }
1087 }
1088
1089 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1090 {
1091         char out[ASCII85_BUFSZ];
1092         long i, l, datalen = 0;
1093
1094         for (i = 0; i < len >> 2; i++) {
1095                 if (data[i])
1096                         datalen = (i + 1) << 2;
1097         }
1098
1099         if (datalen == 0)
1100                 return;
1101
1102         drm_puts(p, "    data: !!ascii85 |\n");
1103         drm_puts(p, "      ");
1104
1105
1106         l = ascii85_encode_len(datalen);
1107
1108         for (i = 0; i < l; i++)
1109                 drm_puts(p, ascii85_encode(data[i], out));
1110
1111         drm_puts(p, "\n");
1112 }
1113
1114 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1115 {
1116         drm_puts(p, fmt);
1117         drm_puts(p, name);
1118         drm_puts(p, "\n");
1119 }
1120
1121 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1122                 struct drm_printer *p)
1123 {
1124         const struct a6xx_shader_block *block = obj->handle;
1125         int i;
1126
1127         if (!obj->handle)
1128                 return;
1129
1130         print_name(p, "  - type: ", block->name);
1131
1132         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1133                 drm_printf(p, "    - bank: %d\n", i);
1134                 drm_printf(p, "      size: %d\n", block->size);
1135
1136                 if (!obj->data)
1137                         continue;
1138
1139                 print_ascii85(p, block->size << 2,
1140                         obj->data + (block->size * i));
1141         }
1142 }
1143
1144 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1145                 struct drm_printer *p)
1146 {
1147         int ctx, index = 0;
1148
1149         for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1150                 int j;
1151
1152                 drm_printf(p, "    - context: %d\n", ctx);
1153
1154                 for (j = 0; j < size; j += 2) {
1155                         u32 count = RANGE(registers, j);
1156                         u32 offset = registers[j];
1157                         int k;
1158
1159                         for (k = 0; k < count; index++, offset++, k++) {
1160                                 if (data[index] == 0xdeafbead)
1161                                         continue;
1162
1163                                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1164                                         offset << 2, data[index]);
1165                         }
1166                 }
1167         }
1168 }
1169
1170 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1171                 struct drm_printer *p)
1172 {
1173         const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1174
1175         if (dbgahb) {
1176                 print_name(p, "  - cluster-name: ", dbgahb->name);
1177                 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1178                         obj->data, p);
1179         }
1180 }
1181
1182 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1183                 struct drm_printer *p)
1184 {
1185         const struct a6xx_cluster *cluster = obj->handle;
1186
1187         if (cluster) {
1188                 print_name(p, "  - cluster-name: ", cluster->name);
1189                 a6xx_show_cluster_data(cluster->registers, cluster->count,
1190                         obj->data, p);
1191         }
1192 }
1193
1194 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1195                 struct drm_printer *p)
1196 {
1197         const struct a6xx_indexed_registers *indexed = obj->handle;
1198
1199         if (!indexed)
1200                 return;
1201
1202         print_name(p, "  - regs-name: ", indexed->name);
1203         drm_printf(p, "    dwords: %d\n", indexed->count);
1204
1205         print_ascii85(p, indexed->count << 2, obj->data);
1206 }
1207
1208 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1209                 u32 *data, struct drm_printer *p)
1210 {
1211         if (block) {
1212                 print_name(p, "  - debugbus-block: ", block->name);
1213
1214                 /*
1215                  * count for regular debugbus data is in quadwords,
1216                  * but print the size in dwords for consistency
1217                  */
1218                 drm_printf(p, "    count: %d\n", block->count << 1);
1219
1220                 print_ascii85(p, block->count << 3, data);
1221         }
1222 }
1223
1224 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1225                 struct drm_printer *p)
1226 {
1227         int i;
1228
1229         for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1230                 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1231
1232                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1233         }
1234
1235         if (a6xx_state->vbif_debugbus) {
1236                 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1237
1238                 drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1239                 drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1240
1241                 /* vbif debugbus data is in dwords.  Confusing, huh? */
1242                 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1243         }
1244
1245         for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1246                 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1247
1248                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1249         }
1250 }
1251
1252 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1253                 struct drm_printer *p)
1254 {
1255         struct a6xx_gpu_state *a6xx_state = container_of(state,
1256                         struct a6xx_gpu_state, base);
1257         int i;
1258
1259         if (IS_ERR_OR_NULL(state))
1260                 return;
1261
1262         drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1263
1264         adreno_show(gpu, state, p);
1265
1266         drm_puts(p, "gmu-log:\n");
1267         if (a6xx_state->gmu_log) {
1268                 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1269
1270                 drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1271                 drm_printf(p, "    size: %zu\n", gmu_log->size);
1272                 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1273                                 &gmu_log->encoded);
1274         }
1275
1276         drm_puts(p, "gmu-hfi:\n");
1277         if (a6xx_state->gmu_hfi) {
1278                 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1279                 unsigned i, j;
1280
1281                 drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1282                 drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1283                 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1284                         drm_printf(p, "    queue-history[%u]:", i);
1285                         for (j = 0; j < HFI_HISTORY_SZ; j++) {
1286                                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1287                         }
1288                         drm_printf(p, "\n");
1289                 }
1290                 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1291                                 &gmu_hfi->encoded);
1292         }
1293
1294         drm_puts(p, "gmu-debug:\n");
1295         if (a6xx_state->gmu_debug) {
1296                 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1297
1298                 drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1299                 drm_printf(p, "    size: %zu\n", gmu_debug->size);
1300                 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1301                                 &gmu_debug->encoded);
1302         }
1303
1304         drm_puts(p, "registers:\n");
1305         for (i = 0; i < a6xx_state->nr_registers; i++) {
1306                 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1307                 const struct a6xx_registers *regs = obj->handle;
1308
1309                 if (!obj->handle)
1310                         continue;
1311
1312                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1313         }
1314
1315         drm_puts(p, "registers-gmu:\n");
1316         for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1317                 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1318                 const struct a6xx_registers *regs = obj->handle;
1319
1320                 if (!obj->handle)
1321                         continue;
1322
1323                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1324         }
1325
1326         drm_puts(p, "indexed-registers:\n");
1327         for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1328                 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1329
1330         drm_puts(p, "shader-blocks:\n");
1331         for (i = 0; i < a6xx_state->nr_shaders; i++)
1332                 a6xx_show_shader(&a6xx_state->shaders[i], p);
1333
1334         drm_puts(p, "clusters:\n");
1335         for (i = 0; i < a6xx_state->nr_clusters; i++)
1336                 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1337
1338         for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1339                 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1340
1341         drm_puts(p, "debugbus:\n");
1342         a6xx_show_debugbus(a6xx_state, p);
1343 }