Merge remote-tracking branch 'msm/msm-fixes' into HEAD
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu_state.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12         const void *handle;
13         u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17         struct msm_gpu_state base;
18
19         struct a6xx_gpu_state_obj *gmu_registers;
20         int nr_gmu_registers;
21
22         struct a6xx_gpu_state_obj *registers;
23         int nr_registers;
24
25         struct a6xx_gpu_state_obj *shaders;
26         int nr_shaders;
27
28         struct a6xx_gpu_state_obj *clusters;
29         int nr_clusters;
30
31         struct a6xx_gpu_state_obj *dbgahb_clusters;
32         int nr_dbgahb_clusters;
33
34         struct a6xx_gpu_state_obj *indexed_regs;
35         int nr_indexed_regs;
36
37         struct a6xx_gpu_state_obj *debugbus;
38         int nr_debugbus;
39
40         struct a6xx_gpu_state_obj *vbif_debugbus;
41
42         struct a6xx_gpu_state_obj *cx_debugbus;
43         int nr_cx_debugbus;
44
45         struct msm_gpu_state_bo *gmu_log;
46         struct msm_gpu_state_bo *gmu_hfi;
47         struct msm_gpu_state_bo *gmu_debug;
48
49         s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50
51         struct list_head objs;
52
53         bool gpu_initialized;
54 };
55
56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58         in[0] = val;
59         in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60
61         return 2;
62 }
63
64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66         in[0] = target;
67         in[1] = (((u64) reg) << 44 | dwords);
68
69         return 2;
70 }
71
72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74         in[0] = 0;
75         in[1] = 0;
76
77         return 2;
78 }
79
80 struct a6xx_crashdumper {
81         void *ptr;
82         struct drm_gem_object *bo;
83         u64 iova;
84 };
85
86 struct a6xx_state_memobj {
87         struct list_head node;
88         unsigned long long data[];
89 };
90
91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93         struct a6xx_state_memobj *obj =
94                 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95
96         if (!obj)
97                 return NULL;
98
99         list_add_tail(&obj->node, &a6xx_state->objs);
100         return &obj->data;
101 }
102
103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104                 size_t size)
105 {
106         void *dst = state_kcalloc(a6xx_state, 1, size);
107
108         if (dst)
109                 memcpy(dst, src, size);
110         return dst;
111 }
112
113 /*
114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115  * the rest for the data
116  */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
119
120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121                 struct a6xx_crashdumper *dumper)
122 {
123         dumper->ptr = msm_gem_kernel_new(gpu->dev,
124                 SZ_1M, MSM_BO_WC, gpu->aspace,
125                 &dumper->bo, &dumper->iova);
126
127         if (!IS_ERR(dumper->ptr))
128                 msm_gem_object_set_name(dumper->bo, "crashdump");
129
130         return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132
133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134                 struct a6xx_crashdumper *dumper)
135 {
136         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138         u32 val;
139         int ret;
140
141         if (IS_ERR_OR_NULL(dumper->ptr))
142                 return -EINVAL;
143
144         if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145                 return -EINVAL;
146
147         /* Make sure all pending memory writes are posted */
148         wmb();
149
150         gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151                 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
152
153         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154
155         ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
156                 val & 0x02, 100, 10000);
157
158         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
159
160         return ret;
161 }
162
163 /* read a value from the GX debug bus */
164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
165                 u32 *data)
166 {
167         u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
168                 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
169
170         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
171         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
172         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
173         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
174
175         /* Wait 1 us to make sure the data is flowing */
176         udelay(1);
177
178         data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
179         data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
180
181         return 2;
182 }
183
184 #define cxdbg_write(ptr, offset, val) \
185         msm_writel((val), (ptr) + ((offset) << 2))
186
187 #define cxdbg_read(ptr, offset) \
188         msm_readl((ptr) + ((offset) << 2))
189
190 /* read a value from the CX debug bus */
191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
192                 u32 *data)
193 {
194         u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
195                 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
196
197         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
198         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
199         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
200         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
201
202         /* Wait 1 us to make sure the data is flowing */
203         udelay(1);
204
205         data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
206         data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
207
208         return 2;
209 }
210
211 /* Read a chunk of data from the VBIF debug bus */
212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
213                 u32 reg, int count, u32 *data)
214 {
215         int i;
216
217         gpu_write(gpu, ctrl0, reg);
218
219         for (i = 0; i < count; i++) {
220                 gpu_write(gpu, ctrl1, i);
221                 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
222         }
223
224         return count;
225 }
226
227 #define AXI_ARB_BLOCKS 2
228 #define XIN_AXI_BLOCKS 5
229 #define XIN_CORE_BLOCKS 4
230
231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
232         ((16 * AXI_ARB_BLOCKS) + \
233          (18 * XIN_AXI_BLOCKS) + \
234          (12 * XIN_CORE_BLOCKS))
235
236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
237                 struct a6xx_gpu_state *a6xx_state,
238                 struct a6xx_gpu_state_obj *obj)
239 {
240         u32 clk, *ptr;
241         int i;
242
243         obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
244                 sizeof(u32));
245         if (!obj->data)
246                 return;
247
248         obj->handle = NULL;
249
250         /* Get the current clock setting */
251         clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
252
253         /* Force on the bus so we can read it */
254         gpu_write(gpu, REG_A6XX_VBIF_CLKON,
255                 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
256
257         /* We will read from BUS2 first, so disable BUS1 */
258         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
259
260         /* Enable the VBIF bus for reading */
261         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
262
263         ptr = obj->data;
264
265         for (i = 0; i < AXI_ARB_BLOCKS; i++)
266                 ptr += vbif_debugbus_read(gpu,
267                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
268                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
269                         1 << (i + 16), 16, ptr);
270
271         for (i = 0; i < XIN_AXI_BLOCKS; i++)
272                 ptr += vbif_debugbus_read(gpu,
273                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
274                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
275                         1 << i, 18, ptr);
276
277         /* Stop BUS2 so we can turn on BUS1 */
278         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
279
280         for (i = 0; i < XIN_CORE_BLOCKS; i++)
281                 ptr += vbif_debugbus_read(gpu,
282                         REG_A6XX_VBIF_TEST_BUS1_CTRL0,
283                         REG_A6XX_VBIF_TEST_BUS1_CTRL1,
284                         1 << i, 12, ptr);
285
286         /* Restore the VBIF clock setting */
287         gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
288 }
289
290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
291                 struct a6xx_gpu_state *a6xx_state,
292                 const struct a6xx_debugbus_block *block,
293                 struct a6xx_gpu_state_obj *obj)
294 {
295         int i;
296         u32 *ptr;
297
298         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
299         if (!obj->data)
300                 return;
301
302         obj->handle = block;
303
304         for (ptr = obj->data, i = 0; i < block->count; i++)
305                 ptr += debugbus_read(gpu, block->id, i, ptr);
306 }
307
308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
309                 struct a6xx_gpu_state *a6xx_state,
310                 const struct a6xx_debugbus_block *block,
311                 struct a6xx_gpu_state_obj *obj)
312 {
313         int i;
314         u32 *ptr;
315
316         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
317         if (!obj->data)
318                 return;
319
320         obj->handle = block;
321
322         for (ptr = obj->data, i = 0; i < block->count; i++)
323                 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
324 }
325
326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
327                 struct a6xx_gpu_state *a6xx_state)
328 {
329         struct resource *res;
330         void __iomem *cxdbg = NULL;
331         int nr_debugbus_blocks;
332
333         /* Set up the GX debug bus */
334
335         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
336                 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
337
338         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
339                 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
340
341         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
342         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
343         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
344         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
345
346         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
347         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
348
349         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
350         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
351         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
352         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
353
354         /* Set up the CX debug bus - it lives elsewhere in the system so do a
355          * temporary ioremap for the registers
356          */
357         res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
358                         "cx_dbgc");
359
360         if (res)
361                 cxdbg = ioremap(res->start, resource_size(res));
362
363         if (cxdbg) {
364                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
365                         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
366
367                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
368                         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
369
370                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
371                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
372                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
373                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
374
375                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
376                         0x76543210);
377                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
378                         0xFEDCBA98);
379
380                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
381                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
382                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
383                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
384         }
385
386         nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
387                 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
388
389         a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
390                         sizeof(*a6xx_state->debugbus));
391
392         if (a6xx_state->debugbus) {
393                 int i;
394
395                 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
396                         a6xx_get_debugbus_block(gpu,
397                                 a6xx_state,
398                                 &a6xx_debugbus_blocks[i],
399                                 &a6xx_state->debugbus[i]);
400
401                 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
402
403                 /*
404                  * GBIF has same debugbus as of other GPU blocks, fall back to
405                  * default path if GPU uses GBIF, also GBIF uses exactly same
406                  * ID as of VBIF.
407                  */
408                 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
409                         a6xx_get_debugbus_block(gpu, a6xx_state,
410                                 &a6xx_gbif_debugbus_block,
411                                 &a6xx_state->debugbus[i]);
412
413                         a6xx_state->nr_debugbus += 1;
414                 }
415         }
416
417         /*  Dump the VBIF debugbus on applicable targets */
418         if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
419                 a6xx_state->vbif_debugbus =
420                         state_kcalloc(a6xx_state, 1,
421                                         sizeof(*a6xx_state->vbif_debugbus));
422
423                 if (a6xx_state->vbif_debugbus)
424                         a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
425                                         a6xx_state->vbif_debugbus);
426         }
427
428         if (cxdbg) {
429                 a6xx_state->cx_debugbus =
430                         state_kcalloc(a6xx_state,
431                         ARRAY_SIZE(a6xx_cx_debugbus_blocks),
432                         sizeof(*a6xx_state->cx_debugbus));
433
434                 if (a6xx_state->cx_debugbus) {
435                         int i;
436
437                         for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
438                                 a6xx_get_cx_debugbus_block(cxdbg,
439                                         a6xx_state,
440                                         &a6xx_cx_debugbus_blocks[i],
441                                         &a6xx_state->cx_debugbus[i]);
442
443                         a6xx_state->nr_cx_debugbus =
444                                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
445                 }
446
447                 iounmap(cxdbg);
448         }
449 }
450
451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
452
453 /* Read a data cluster from behind the AHB aperture */
454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
455                 struct a6xx_gpu_state *a6xx_state,
456                 const struct a6xx_dbgahb_cluster *dbgahb,
457                 struct a6xx_gpu_state_obj *obj,
458                 struct a6xx_crashdumper *dumper)
459 {
460         u64 *in = dumper->ptr;
461         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
462         size_t datasize;
463         int i, regcount = 0;
464
465         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
466                 int j;
467
468                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
469                         (dbgahb->statetype + i * 2) << 8);
470
471                 for (j = 0; j < dbgahb->count; j += 2) {
472                         int count = RANGE(dbgahb->registers, j);
473                         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
474                                 dbgahb->registers[j] - (dbgahb->base >> 2);
475
476                         in += CRASHDUMP_READ(in, offset, count, out);
477
478                         out += count * sizeof(u32);
479
480                         if (i == 0)
481                                 regcount += count;
482                 }
483         }
484
485         CRASHDUMP_FINI(in);
486
487         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
488
489         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
490                 return;
491
492         if (a6xx_crashdumper_run(gpu, dumper))
493                 return;
494
495         obj->handle = dbgahb;
496         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
497                 datasize);
498 }
499
500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
501                 struct a6xx_gpu_state *a6xx_state,
502                 struct a6xx_crashdumper *dumper)
503 {
504         int i;
505
506         a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
507                 ARRAY_SIZE(a6xx_dbgahb_clusters),
508                 sizeof(*a6xx_state->dbgahb_clusters));
509
510         if (!a6xx_state->dbgahb_clusters)
511                 return;
512
513         a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
514
515         for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
516                 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
517                         &a6xx_dbgahb_clusters[i],
518                         &a6xx_state->dbgahb_clusters[i], dumper);
519 }
520
521 /* Read a data cluster from the CP aperture with the crashdumper */
522 static void a6xx_get_cluster(struct msm_gpu *gpu,
523                 struct a6xx_gpu_state *a6xx_state,
524                 const struct a6xx_cluster *cluster,
525                 struct a6xx_gpu_state_obj *obj,
526                 struct a6xx_crashdumper *dumper)
527 {
528         u64 *in = dumper->ptr;
529         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
530         size_t datasize;
531         int i, regcount = 0;
532
533         /* Some clusters need a selector register to be programmed too */
534         if (cluster->sel_reg)
535                 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
536
537         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
538                 int j;
539
540                 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
541                         (cluster->id << 8) | (i << 4) | i);
542
543                 for (j = 0; j < cluster->count; j += 2) {
544                         int count = RANGE(cluster->registers, j);
545
546                         in += CRASHDUMP_READ(in, cluster->registers[j],
547                                 count, out);
548
549                         out += count * sizeof(u32);
550
551                         if (i == 0)
552                                 regcount += count;
553                 }
554         }
555
556         CRASHDUMP_FINI(in);
557
558         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
559
560         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
561                 return;
562
563         if (a6xx_crashdumper_run(gpu, dumper))
564                 return;
565
566         obj->handle = cluster;
567         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
568                 datasize);
569 }
570
571 static void a6xx_get_clusters(struct msm_gpu *gpu,
572                 struct a6xx_gpu_state *a6xx_state,
573                 struct a6xx_crashdumper *dumper)
574 {
575         int i;
576
577         a6xx_state->clusters = state_kcalloc(a6xx_state,
578                 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
579
580         if (!a6xx_state->clusters)
581                 return;
582
583         a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
584
585         for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
586                 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
587                         &a6xx_state->clusters[i], dumper);
588 }
589
590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
592                 struct a6xx_gpu_state *a6xx_state,
593                 const struct a6xx_shader_block *block,
594                 struct a6xx_gpu_state_obj *obj,
595                 struct a6xx_crashdumper *dumper)
596 {
597         u64 *in = dumper->ptr;
598         size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
599         int i;
600
601         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
602                 return;
603
604         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
605                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
606                         (block->type << 8) | i);
607
608                 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
609                         block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
610         }
611
612         CRASHDUMP_FINI(in);
613
614         if (a6xx_crashdumper_run(gpu, dumper))
615                 return;
616
617         obj->handle = block;
618         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
619                 datasize);
620 }
621
622 static void a6xx_get_shaders(struct msm_gpu *gpu,
623                 struct a6xx_gpu_state *a6xx_state,
624                 struct a6xx_crashdumper *dumper)
625 {
626         int i;
627
628         a6xx_state->shaders = state_kcalloc(a6xx_state,
629                 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
630
631         if (!a6xx_state->shaders)
632                 return;
633
634         a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
635
636         for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
637                 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
638                         &a6xx_state->shaders[i], dumper);
639 }
640
641 /* Read registers from behind the HLSQ aperture with the crashdumper */
642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
643                 struct a6xx_gpu_state *a6xx_state,
644                 const struct a6xx_registers *regs,
645                 struct a6xx_gpu_state_obj *obj,
646                 struct a6xx_crashdumper *dumper)
647
648 {
649         u64 *in = dumper->ptr;
650         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
651         int i, regcount = 0;
652
653         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
654
655         for (i = 0; i < regs->count; i += 2) {
656                 u32 count = RANGE(regs->registers, i);
657                 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
658                         regs->registers[i] - (regs->val0 >> 2);
659
660                 in += CRASHDUMP_READ(in, offset, count, out);
661
662                 out += count * sizeof(u32);
663                 regcount += count;
664         }
665
666         CRASHDUMP_FINI(in);
667
668         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
669                 return;
670
671         if (a6xx_crashdumper_run(gpu, dumper))
672                 return;
673
674         obj->handle = regs;
675         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
676                 regcount * sizeof(u32));
677 }
678
679 /* Read a block of registers using the crashdumper */
680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
681                 struct a6xx_gpu_state *a6xx_state,
682                 const struct a6xx_registers *regs,
683                 struct a6xx_gpu_state_obj *obj,
684                 struct a6xx_crashdumper *dumper)
685
686 {
687         u64 *in = dumper->ptr;
688         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689         int i, regcount = 0;
690
691         /* Some blocks might need to program a selector register first */
692         if (regs->val0)
693                 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
694
695         for (i = 0; i < regs->count; i += 2) {
696                 u32 count = RANGE(regs->registers, i);
697
698                 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
699
700                 out += count * sizeof(u32);
701                 regcount += count;
702         }
703
704         CRASHDUMP_FINI(in);
705
706         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
707                 return;
708
709         if (a6xx_crashdumper_run(gpu, dumper))
710                 return;
711
712         obj->handle = regs;
713         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
714                 regcount * sizeof(u32));
715 }
716
717 /* Read a block of registers via AHB */
718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
719                 struct a6xx_gpu_state *a6xx_state,
720                 const struct a6xx_registers *regs,
721                 struct a6xx_gpu_state_obj *obj)
722 {
723         int i, regcount = 0, index = 0;
724
725         for (i = 0; i < regs->count; i += 2)
726                 regcount += RANGE(regs->registers, i);
727
728         obj->handle = (const void *) regs;
729         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
730         if (!obj->data)
731                 return;
732
733         for (i = 0; i < regs->count; i += 2) {
734                 u32 count = RANGE(regs->registers, i);
735                 int j;
736
737                 for (j = 0; j < count; j++)
738                         obj->data[index++] = gpu_read(gpu,
739                                 regs->registers[i] + j);
740         }
741 }
742
743 /* Read a block of GMU registers */
744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
745                 struct a6xx_gpu_state *a6xx_state,
746                 const struct a6xx_registers *regs,
747                 struct a6xx_gpu_state_obj *obj,
748                 bool rscc)
749 {
750         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
751         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
752         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
753         int i, regcount = 0, index = 0;
754
755         for (i = 0; i < regs->count; i += 2)
756                 regcount += RANGE(regs->registers, i);
757
758         obj->handle = (const void *) regs;
759         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
760         if (!obj->data)
761                 return;
762
763         for (i = 0; i < regs->count; i += 2) {
764                 u32 count = RANGE(regs->registers, i);
765                 int j;
766
767                 for (j = 0; j < count; j++) {
768                         u32 offset = regs->registers[i] + j;
769                         u32 val;
770
771                         if (rscc)
772                                 val = gmu_read_rscc(gmu, offset);
773                         else
774                                 val = gmu_read(gmu, offset);
775
776                         obj->data[index++] = val;
777                 }
778         }
779 }
780
781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
782                 struct a6xx_gpu_state *a6xx_state)
783 {
784         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
785         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
786
787         a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
788                 3, sizeof(*a6xx_state->gmu_registers));
789
790         if (!a6xx_state->gmu_registers)
791                 return;
792
793         a6xx_state->nr_gmu_registers = 3;
794
795         /* Get the CX GMU registers from AHB */
796         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
797                 &a6xx_state->gmu_registers[0], false);
798         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
799                 &a6xx_state->gmu_registers[1], true);
800
801         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
802                 return;
803
804         /* Set the fence to ALLOW mode so we can access the registers */
805         gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
806
807         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
808                 &a6xx_state->gmu_registers[2], false);
809 }
810
811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
812                 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
813 {
814         struct msm_gpu_state_bo *snapshot;
815
816         if (!bo->size)
817                 return NULL;
818
819         snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
820         if (!snapshot)
821                 return NULL;
822
823         snapshot->iova = bo->iova;
824         snapshot->size = bo->size;
825         snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
826         if (!snapshot->data)
827                 return NULL;
828
829         memcpy(snapshot->data, bo->virt, bo->size);
830
831         return snapshot;
832 }
833
834 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
835                                           struct a6xx_gpu_state *a6xx_state)
836 {
837         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
838         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
839         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
840         unsigned i, j;
841
842         BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
843
844         for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
845                 struct a6xx_hfi_queue *queue = &gmu->queues[i];
846                 for (j = 0; j < HFI_HISTORY_SZ; j++) {
847                         unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
848                         a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
849                 }
850         }
851 }
852
853 #define A6XX_GBIF_REGLIST_SIZE   1
854 static void a6xx_get_registers(struct msm_gpu *gpu,
855                 struct a6xx_gpu_state *a6xx_state,
856                 struct a6xx_crashdumper *dumper)
857 {
858         int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
859                 ARRAY_SIZE(a6xx_reglist) +
860                 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
861         int index = 0;
862         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
863
864         a6xx_state->registers = state_kcalloc(a6xx_state,
865                 count, sizeof(*a6xx_state->registers));
866
867         if (!a6xx_state->registers)
868                 return;
869
870         a6xx_state->nr_registers = count;
871
872         for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
873                 a6xx_get_ahb_gpu_registers(gpu,
874                         a6xx_state, &a6xx_ahb_reglist[i],
875                         &a6xx_state->registers[index++]);
876
877         if (a6xx_has_gbif(adreno_gpu))
878                 a6xx_get_ahb_gpu_registers(gpu,
879                                 a6xx_state, &a6xx_gbif_reglist,
880                                 &a6xx_state->registers[index++]);
881         else
882                 a6xx_get_ahb_gpu_registers(gpu,
883                                 a6xx_state, &a6xx_vbif_reglist,
884                                 &a6xx_state->registers[index++]);
885         if (!dumper) {
886                 /*
887                  * We can't use the crashdumper when the SMMU is stalled,
888                  * because the GPU has no memory access until we resume
889                  * translation (but we don't want to do that until after
890                  * we have captured as much useful GPU state as possible).
891                  * So instead collect registers via the CPU:
892                  */
893                 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
894                         a6xx_get_ahb_gpu_registers(gpu,
895                                 a6xx_state, &a6xx_reglist[i],
896                                 &a6xx_state->registers[index++]);
897                 return;
898         }
899
900         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
901                 a6xx_get_crashdumper_registers(gpu,
902                         a6xx_state, &a6xx_reglist[i],
903                         &a6xx_state->registers[index++],
904                         dumper);
905
906         for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
907                 a6xx_get_crashdumper_hlsq_registers(gpu,
908                         a6xx_state, &a6xx_hlsq_reglist[i],
909                         &a6xx_state->registers[index++],
910                         dumper);
911 }
912
913 /* Read a block of data from an indexed register pair */
914 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
915                 struct a6xx_gpu_state *a6xx_state,
916                 const struct a6xx_indexed_registers *indexed,
917                 struct a6xx_gpu_state_obj *obj)
918 {
919         int i;
920
921         obj->handle = (const void *) indexed;
922         obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
923         if (!obj->data)
924                 return;
925
926         /* All the indexed banks start at address 0 */
927         gpu_write(gpu, indexed->addr, 0);
928
929         /* Read the data - each read increments the internal address by 1 */
930         for (i = 0; i < indexed->count; i++)
931                 obj->data[i] = gpu_read(gpu, indexed->data);
932 }
933
934 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
935                 struct a6xx_gpu_state *a6xx_state)
936 {
937         u32 mempool_size;
938         int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
939         int i;
940
941         a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
942                 sizeof(*a6xx_state->indexed_regs));
943         if (!a6xx_state->indexed_regs)
944                 return;
945
946         for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
947                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
948                         &a6xx_state->indexed_regs[i]);
949
950         /* Set the CP mempool size to 0 to stabilize it while dumping */
951         mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
952         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
953
954         /* Get the contents of the CP mempool */
955         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
956                 &a6xx_state->indexed_regs[i]);
957
958         /*
959          * Offset 0x2000 in the mempool is the size - copy the saved size over
960          * so the data is consistent
961          */
962         a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
963
964         /* Restore the size in the hardware */
965         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
966
967         a6xx_state->nr_indexed_regs = count;
968 }
969
970 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
971 {
972         struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
973         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
974         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
975         struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
976                 GFP_KERNEL);
977         bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
978                         A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
979
980         if (!a6xx_state)
981                 return ERR_PTR(-ENOMEM);
982
983         INIT_LIST_HEAD(&a6xx_state->objs);
984
985         /* Get the generic state from the adreno core */
986         adreno_gpu_state_get(gpu, &a6xx_state->base);
987
988         a6xx_get_gmu_registers(gpu, a6xx_state);
989
990         a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
991         a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
992         a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
993
994         a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
995
996         /* If GX isn't on the rest of the data isn't going to be accessible */
997         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
998                 return &a6xx_state->base;
999
1000         /* Get the banks of indexed registers */
1001         a6xx_get_indexed_registers(gpu, a6xx_state);
1002
1003         /*
1004          * Try to initialize the crashdumper, if we are not dumping state
1005          * with the SMMU stalled.  The crashdumper needs memory access to
1006          * write out GPU state, so we need to skip this when the SMMU is
1007          * stalled in response to an iova fault
1008          */
1009         if (!stalled && !gpu->needs_hw_init &&
1010             !a6xx_crashdumper_init(gpu, &_dumper)) {
1011                 dumper = &_dumper;
1012         }
1013
1014         a6xx_get_registers(gpu, a6xx_state, dumper);
1015
1016         if (dumper) {
1017                 a6xx_get_shaders(gpu, a6xx_state, dumper);
1018                 a6xx_get_clusters(gpu, a6xx_state, dumper);
1019                 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1020
1021                 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1022         }
1023
1024         if (snapshot_debugbus)
1025                 a6xx_get_debugbus(gpu, a6xx_state);
1026
1027         a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1028
1029         return  &a6xx_state->base;
1030 }
1031
1032 static void a6xx_gpu_state_destroy(struct kref *kref)
1033 {
1034         struct a6xx_state_memobj *obj, *tmp;
1035         struct msm_gpu_state *state = container_of(kref,
1036                         struct msm_gpu_state, ref);
1037         struct a6xx_gpu_state *a6xx_state = container_of(state,
1038                         struct a6xx_gpu_state, base);
1039
1040         if (a6xx_state->gmu_log)
1041                 kvfree(a6xx_state->gmu_log->data);
1042
1043         if (a6xx_state->gmu_hfi)
1044                 kvfree(a6xx_state->gmu_hfi->data);
1045
1046         if (a6xx_state->gmu_debug)
1047                 kvfree(a6xx_state->gmu_debug->data);
1048
1049         list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1050                 list_del(&obj->node);
1051                 kvfree(obj);
1052         }
1053
1054         adreno_gpu_state_destroy(state);
1055         kfree(a6xx_state);
1056 }
1057
1058 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1059 {
1060         if (IS_ERR_OR_NULL(state))
1061                 return 1;
1062
1063         return kref_put(&state->ref, a6xx_gpu_state_destroy);
1064 }
1065
1066 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1067                 struct drm_printer *p)
1068 {
1069         int i, index = 0;
1070
1071         if (!data)
1072                 return;
1073
1074         for (i = 0; i < count; i += 2) {
1075                 u32 count = RANGE(registers, i);
1076                 u32 offset = registers[i];
1077                 int j;
1078
1079                 for (j = 0; j < count; index++, offset++, j++) {
1080                         if (data[index] == 0xdeafbead)
1081                                 continue;
1082
1083                         drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1084                                 offset << 2, data[index]);
1085                 }
1086         }
1087 }
1088
1089 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1090 {
1091         char out[ASCII85_BUFSZ];
1092         long i, l, datalen = 0;
1093
1094         for (i = 0; i < len >> 2; i++) {
1095                 if (data[i])
1096                         datalen = (i + 1) << 2;
1097         }
1098
1099         if (datalen == 0)
1100                 return;
1101
1102         drm_puts(p, "    data: !!ascii85 |\n");
1103         drm_puts(p, "      ");
1104
1105
1106         l = ascii85_encode_len(datalen);
1107
1108         for (i = 0; i < l; i++)
1109                 drm_puts(p, ascii85_encode(data[i], out));
1110
1111         drm_puts(p, "\n");
1112 }
1113
1114 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1115 {
1116         drm_puts(p, fmt);
1117         drm_puts(p, name);
1118         drm_puts(p, "\n");
1119 }
1120
1121 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1122                 struct drm_printer *p)
1123 {
1124         const struct a6xx_shader_block *block = obj->handle;
1125         int i;
1126
1127         if (!obj->handle)
1128                 return;
1129
1130         print_name(p, "  - type: ", block->name);
1131
1132         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1133                 drm_printf(p, "    - bank: %d\n", i);
1134                 drm_printf(p, "      size: %d\n", block->size);
1135
1136                 if (!obj->data)
1137                         continue;
1138
1139                 print_ascii85(p, block->size << 2,
1140                         obj->data + (block->size * i));
1141         }
1142 }
1143
1144 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1145                 struct drm_printer *p)
1146 {
1147         int ctx, index = 0;
1148
1149         for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1150                 int j;
1151
1152                 drm_printf(p, "    - context: %d\n", ctx);
1153
1154                 for (j = 0; j < size; j += 2) {
1155                         u32 count = RANGE(registers, j);
1156                         u32 offset = registers[j];
1157                         int k;
1158
1159                         for (k = 0; k < count; index++, offset++, k++) {
1160                                 if (data[index] == 0xdeafbead)
1161                                         continue;
1162
1163                                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1164                                         offset << 2, data[index]);
1165                         }
1166                 }
1167         }
1168 }
1169
1170 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1171                 struct drm_printer *p)
1172 {
1173         const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1174
1175         if (dbgahb) {
1176                 print_name(p, "  - cluster-name: ", dbgahb->name);
1177                 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1178                         obj->data, p);
1179         }
1180 }
1181
1182 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1183                 struct drm_printer *p)
1184 {
1185         const struct a6xx_cluster *cluster = obj->handle;
1186
1187         if (cluster) {
1188                 print_name(p, "  - cluster-name: ", cluster->name);
1189                 a6xx_show_cluster_data(cluster->registers, cluster->count,
1190                         obj->data, p);
1191         }
1192 }
1193
1194 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1195                 struct drm_printer *p)
1196 {
1197         const struct a6xx_indexed_registers *indexed = obj->handle;
1198
1199         if (!indexed)
1200                 return;
1201
1202         print_name(p, "  - regs-name: ", indexed->name);
1203         drm_printf(p, "    dwords: %d\n", indexed->count);
1204
1205         print_ascii85(p, indexed->count << 2, obj->data);
1206 }
1207
1208 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1209                 u32 *data, struct drm_printer *p)
1210 {
1211         if (block) {
1212                 print_name(p, "  - debugbus-block: ", block->name);
1213
1214                 /*
1215                  * count for regular debugbus data is in quadwords,
1216                  * but print the size in dwords for consistency
1217                  */
1218                 drm_printf(p, "    count: %d\n", block->count << 1);
1219
1220                 print_ascii85(p, block->count << 3, data);
1221         }
1222 }
1223
1224 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1225                 struct drm_printer *p)
1226 {
1227         int i;
1228
1229         for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1230                 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1231
1232                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1233         }
1234
1235         if (a6xx_state->vbif_debugbus) {
1236                 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1237
1238                 drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1239                 drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1240
1241                 /* vbif debugbus data is in dwords.  Confusing, huh? */
1242                 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1243         }
1244
1245         for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1246                 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1247
1248                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1249         }
1250 }
1251
1252 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1253                 struct drm_printer *p)
1254 {
1255         struct a6xx_gpu_state *a6xx_state = container_of(state,
1256                         struct a6xx_gpu_state, base);
1257         int i;
1258
1259         if (IS_ERR_OR_NULL(state))
1260                 return;
1261
1262         drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1263
1264         adreno_show(gpu, state, p);
1265
1266         drm_puts(p, "gmu-log:\n");
1267         if (a6xx_state->gmu_log) {
1268                 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1269
1270                 drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1271                 drm_printf(p, "    size: %zu\n", gmu_log->size);
1272                 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1273                                 &gmu_log->encoded);
1274         }
1275
1276         drm_puts(p, "gmu-hfi:\n");
1277         if (a6xx_state->gmu_hfi) {
1278                 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1279                 unsigned i, j;
1280
1281                 drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1282                 drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1283                 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1284                         drm_printf(p, "    queue-history[%u]:", i);
1285                         for (j = 0; j < HFI_HISTORY_SZ; j++) {
1286                                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1287                         }
1288                         drm_printf(p, "\n");
1289                 }
1290                 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1291                                 &gmu_hfi->encoded);
1292         }
1293
1294         drm_puts(p, "gmu-debug:\n");
1295         if (a6xx_state->gmu_debug) {
1296                 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1297
1298                 drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1299                 drm_printf(p, "    size: %zu\n", gmu_debug->size);
1300                 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1301                                 &gmu_debug->encoded);
1302         }
1303
1304         drm_puts(p, "registers:\n");
1305         for (i = 0; i < a6xx_state->nr_registers; i++) {
1306                 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1307                 const struct a6xx_registers *regs = obj->handle;
1308
1309                 if (!obj->handle)
1310                         continue;
1311
1312                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1313         }
1314
1315         drm_puts(p, "registers-gmu:\n");
1316         for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1317                 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1318                 const struct a6xx_registers *regs = obj->handle;
1319
1320                 if (!obj->handle)
1321                         continue;
1322
1323                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1324         }
1325
1326         drm_puts(p, "indexed-registers:\n");
1327         for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1328                 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1329
1330         drm_puts(p, "shader-blocks:\n");
1331         for (i = 0; i < a6xx_state->nr_shaders; i++)
1332                 a6xx_show_shader(&a6xx_state->shaders[i], p);
1333
1334         drm_puts(p, "clusters:\n");
1335         for (i = 0; i < a6xx_state->nr_clusters; i++)
1336                 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1337
1338         for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1339                 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1340
1341         drm_puts(p, "debugbus:\n");
1342         a6xx_show_debugbus(a6xx_state, p);
1343 }