Merge tag 'drm-msm-fixes-2020-09-04' of https://gitlab.freedesktop.org/drm/msm into...
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25         uint32_t wptr;
26         unsigned long flags;
27
28         spin_lock_irqsave(&ring->lock, flags);
29
30         /* Copy the shadow to the actual register */
31         ring->cur = ring->next;
32
33         /* Make sure to wrap wptr if we need to */
34         wptr = get_wptr(ring);
35
36         spin_unlock_irqrestore(&ring->lock, flags);
37
38         /* Make sure everything is posted before making a decision */
39         mb();
40
41         /* Update HW if this is the current ring and we are not in preempt */
42         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47         struct msm_file_private *ctx)
48 {
49         struct msm_drm_private *priv = gpu->dev->dev_private;
50         struct msm_ringbuffer *ring = submit->ring;
51         struct msm_gem_object *obj;
52         uint32_t *ptr, dwords;
53         unsigned int i;
54
55         for (i = 0; i < submit->nr_cmds; i++) {
56                 switch (submit->cmd[i].type) {
57                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58                         break;
59                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60                         if (priv->lastctx == ctx)
61                                 break;
62                         fallthrough;
63                 case MSM_SUBMIT_CMD_BUF:
64                         /* copy commands into RB: */
65                         obj = submit->bos[submit->cmd[i].idx].obj;
66                         dwords = submit->cmd[i].size;
67
68                         ptr = msm_gem_get_vaddr(&obj->base);
69
70                         /* _get_vaddr() shouldn't fail at this point,
71                          * since we've already mapped it once in
72                          * submit_reloc()
73                          */
74                         if (WARN_ON(!ptr))
75                                 return;
76
77                         for (i = 0; i < dwords; i++) {
78                                 /* normally the OUT_PKTn() would wait
79                                  * for space for the packet.  But since
80                                  * we just OUT_RING() the whole thing,
81                                  * need to call adreno_wait_ring()
82                                  * ourself:
83                                  */
84                                 adreno_wait_ring(ring, 1);
85                                 OUT_RING(ring, ptr[i]);
86                         }
87
88                         msm_gem_put_vaddr(&obj->base);
89
90                         break;
91                 }
92         }
93
94         a5xx_flush(gpu, ring);
95         a5xx_preempt_trigger(gpu);
96
97         /* we might not necessarily have a cmd from userspace to
98          * trigger an event to know that submit has completed, so
99          * do this manually:
100          */
101         a5xx_idle(gpu, ring);
102         ring->memptrs->fence = submit->seqno;
103         msm_gpu_retire(gpu);
104 }
105
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107         struct msm_file_private *ctx)
108 {
109         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111         struct msm_drm_private *priv = gpu->dev->dev_private;
112         struct msm_ringbuffer *ring = submit->ring;
113         unsigned int i, ibs = 0;
114
115         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116                 priv->lastctx = NULL;
117                 a5xx_submit_in_rb(gpu, submit, ctx);
118                 return;
119         }
120
121         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122         OUT_RING(ring, 0x02);
123
124         /* Turn off protected mode to write to special registers */
125         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126         OUT_RING(ring, 0);
127
128         /* Set the save preemption record for the ring/command */
129         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132
133         /* Turn back on protected mode */
134         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135         OUT_RING(ring, 1);
136
137         /* Enable local preemption for finegrain preemption */
138         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139         OUT_RING(ring, 0x02);
140
141         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143         OUT_RING(ring, 0x02);
144
145         /* Submit the commands */
146         for (i = 0; i < submit->nr_cmds; i++) {
147                 switch (submit->cmd[i].type) {
148                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149                         break;
150                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151                         if (priv->lastctx == ctx)
152                                 break;
153                         fallthrough;
154                 case MSM_SUBMIT_CMD_BUF:
155                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158                         OUT_RING(ring, submit->cmd[i].size);
159                         ibs++;
160                         break;
161                 }
162         }
163
164         /*
165          * Write the render mode to NULL (0) to indicate to the CP that the IBs
166          * are done rendering - otherwise a lucky preemption would start
167          * replaying from the last checkpoint
168          */
169         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170         OUT_RING(ring, 0);
171         OUT_RING(ring, 0);
172         OUT_RING(ring, 0);
173         OUT_RING(ring, 0);
174         OUT_RING(ring, 0);
175
176         /* Turn off IB level preemptions */
177         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178         OUT_RING(ring, 0x01);
179
180         /* Write the fence to the scratch register */
181         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182         OUT_RING(ring, submit->seqno);
183
184         /*
185          * Execute a CACHE_FLUSH_TS event. This will ensure that the
186          * timestamp is written to the memory and then triggers the interrupt
187          */
188         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
190                 CP_EVENT_WRITE_0_IRQ);
191         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
192         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
193         OUT_RING(ring, submit->seqno);
194
195         /* Yield the floor on command completion */
196         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
197         /*
198          * If dword[2:1] are non zero, they specify an address for the CP to
199          * write the value of dword[3] to on preemption complete. Write 0 to
200          * skip the write
201          */
202         OUT_RING(ring, 0x00);
203         OUT_RING(ring, 0x00);
204         /* Data value - not used if the address above is 0 */
205         OUT_RING(ring, 0x01);
206         /* Set bit 0 to trigger an interrupt on preempt complete */
207         OUT_RING(ring, 0x01);
208
209         a5xx_flush(gpu, ring);
210
211         /* Check to see if we need to start preemption */
212         a5xx_preempt_trigger(gpu);
213 }
214
215 static const struct {
216         u32 offset;
217         u32 value;
218 } a5xx_hwcg[] = {
219         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
220         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
221         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
222         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
223         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
224         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
225         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
226         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
227         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
228         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
229         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
230         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
231         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
232         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
233         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
234         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
235         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
236         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
237         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
238         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
239         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
240         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
241         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
242         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
243         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
244         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
245         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
246         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
247         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
248         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
249         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
250         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
251         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
252         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
253         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
254         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
255         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
256         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
257         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
258         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
259         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
260         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
261         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
262         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
263         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
264         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
265         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
266         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
267         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
268         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
269         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
270         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
271         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
272         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
273         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
274         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
275         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
276         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
277         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
278         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
279         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
280         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
281         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
282         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
283         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
284         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
285         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
286         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
287         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
288         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
289         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
290         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
291         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
292         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
293         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
294         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
295         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
296         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
297         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
298         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
299         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
300         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
301         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
302         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
303         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
304         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
305         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
306         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
307         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
308         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
309         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
310         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
311 };
312
313 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
314 {
315         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
316         unsigned int i;
317
318         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
319                 gpu_write(gpu, a5xx_hwcg[i].offset,
320                         state ? a5xx_hwcg[i].value : 0);
321
322         if (adreno_is_a540(adreno_gpu)) {
323                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
324                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
325         }
326
327         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
329 }
330
331 static int a5xx_me_init(struct msm_gpu *gpu)
332 {
333         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334         struct msm_ringbuffer *ring = gpu->rb[0];
335
336         OUT_PKT7(ring, CP_ME_INIT, 8);
337
338         OUT_RING(ring, 0x0000002F);
339
340         /* Enable multiple hardware contexts */
341         OUT_RING(ring, 0x00000003);
342
343         /* Enable error detection */
344         OUT_RING(ring, 0x20000000);
345
346         /* Don't enable header dump */
347         OUT_RING(ring, 0x00000000);
348         OUT_RING(ring, 0x00000000);
349
350         /* Specify workarounds for various microcode issues */
351         if (adreno_is_a530(adreno_gpu)) {
352                 /* Workaround for token end syncs
353                  * Force a WFI after every direct-render 3D mode draw and every
354                  * 2D mode 3 draw
355                  */
356                 OUT_RING(ring, 0x0000000B);
357         } else if (adreno_is_a510(adreno_gpu)) {
358                 /* Workaround for token and syncs */
359                 OUT_RING(ring, 0x00000001);
360         } else {
361                 /* No workarounds enabled */
362                 OUT_RING(ring, 0x00000000);
363         }
364
365         OUT_RING(ring, 0x00000000);
366         OUT_RING(ring, 0x00000000);
367
368         gpu->funcs->flush(gpu, ring);
369         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
370 }
371
372 static int a5xx_preempt_start(struct msm_gpu *gpu)
373 {
374         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
376         struct msm_ringbuffer *ring = gpu->rb[0];
377
378         if (gpu->nr_rings == 1)
379                 return 0;
380
381         /* Turn off protected mode to write to special registers */
382         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
383         OUT_RING(ring, 0);
384
385         /* Set the save preemption record for the ring/command */
386         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
387         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
389
390         /* Turn back on protected mode */
391         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
392         OUT_RING(ring, 1);
393
394         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
395         OUT_RING(ring, 0x00);
396
397         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
398         OUT_RING(ring, 0x01);
399
400         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
401         OUT_RING(ring, 0x01);
402
403         /* Yield the floor on command completion */
404         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
405         OUT_RING(ring, 0x00);
406         OUT_RING(ring, 0x00);
407         OUT_RING(ring, 0x01);
408         OUT_RING(ring, 0x01);
409
410         gpu->funcs->flush(gpu, ring);
411
412         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
413 }
414
415 static int a5xx_ucode_init(struct msm_gpu *gpu)
416 {
417         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
418         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
419         int ret;
420
421         if (!a5xx_gpu->pm4_bo) {
422                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
423                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
424
425
426                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
427                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
428                         a5xx_gpu->pm4_bo = NULL;
429                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
430                                 ret);
431                         return ret;
432                 }
433
434                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
435         }
436
437         if (!a5xx_gpu->pfp_bo) {
438                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
439                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
440
441                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
442                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
443                         a5xx_gpu->pfp_bo = NULL;
444                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
445                                 ret);
446                         return ret;
447                 }
448
449                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
450         }
451
452         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
453                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
454
455         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
456                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
457
458         return 0;
459 }
460
461 #define SCM_GPU_ZAP_SHADER_RESUME 0
462
463 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
464 {
465         int ret;
466
467         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
468         if (ret)
469                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
470                         gpu->name, ret);
471
472         return ret;
473 }
474
475 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
476 {
477         static bool loaded;
478         int ret;
479
480         /*
481          * If the zap shader is already loaded into memory we just need to kick
482          * the remote processor to reinitialize it
483          */
484         if (loaded)
485                 return a5xx_zap_shader_resume(gpu);
486
487         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
488
489         loaded = !ret;
490         return ret;
491 }
492
493 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
494           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
495           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
496           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
497           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
498           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
499           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
500           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
501           A5XX_RBBM_INT_0_MASK_CP_SW | \
502           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
503           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
504           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
505
506 static int a5xx_hw_init(struct msm_gpu *gpu)
507 {
508         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
509         int ret;
510
511         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
512
513         if (adreno_is_a540(adreno_gpu))
514                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
515
516         /* Make all blocks contribute to the GPU BUSY perf counter */
517         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
518
519         /* Enable RBBM error reporting bits */
520         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
521
522         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
523                 /*
524                  * Mask out the activity signals from RB1-3 to avoid false
525                  * positives
526                  */
527
528                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
529                         0xF0000000);
530                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
531                         0xFFFFFFFF);
532                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
533                         0xFFFFFFFF);
534                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
535                         0xFFFFFFFF);
536                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
537                         0xFFFFFFFF);
538                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
539                         0xFFFFFFFF);
540                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
541                         0xFFFFFFFF);
542                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
543                         0xFFFFFFFF);
544         }
545
546         /* Enable fault detection */
547         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
548                 (1 << 30) | 0xFFFF);
549
550         /* Turn on performance counters */
551         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
552
553         /* Select CP0 to always count cycles */
554         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
555
556         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
557         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
558
559         /* Increase VFD cache access so LRZ and other data gets evicted less */
560         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
561
562         /* Disable L2 bypass in the UCHE */
563         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
564         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
565         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
566         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
567
568         /* Set the GMEM VA range (0 to gpu->gmem) */
569         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
570         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
571         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
572                 0x00100000 + adreno_gpu->gmem - 1);
573         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
574
575         if (adreno_is_a510(adreno_gpu)) {
576                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
577                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
578                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
579                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
580                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
581                           (0x200 << 11 | 0x200 << 22));
582         } else {
583                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
584                 if (adreno_is_a530(adreno_gpu))
585                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
586                 if (adreno_is_a540(adreno_gpu))
587                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
588                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
589                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
590                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
591                           (0x400 << 11 | 0x300 << 22));
592         }
593
594         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
595                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
596
597         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
598
599         /* Enable USE_RETENTION_FLOPS */
600         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
601
602         /* Enable ME/PFP split notification */
603         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
604
605         /*
606          *  In A5x, CCU can send context_done event of a particular context to
607          *  UCHE which ultimately reaches CP even when there is valid
608          *  transaction of that context inside CCU. This can let CP to program
609          *  config registers, which will make the "valid transaction" inside
610          *  CCU to be interpreted differently. This can cause gpu fault. This
611          *  bug is fixed in latest A510 revision. To enable this bug fix -
612          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
613          *  (disable). For older A510 version this bit is unused.
614          */
615         if (adreno_is_a510(adreno_gpu))
616                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
617
618         /* Enable HWCG */
619         a5xx_set_hwcg(gpu, true);
620
621         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
622
623         /* Set the highest bank bit */
624         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
625         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
626         if (adreno_is_a540(adreno_gpu))
627                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
628
629         /* Protect registers from the CP */
630         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
631
632         /* RBBM */
633         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
634         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
635         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
636         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
637         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
638         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
639
640         /* Content protect */
641         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
642                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
643                         16));
644         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
645                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
646
647         /* CP */
648         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
649         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
650         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
651         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
652
653         /* RB */
654         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
655         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
656
657         /* VPC */
658         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
659         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
660
661         /* UCHE */
662         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
663
664         if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
665                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
666                         ADRENO_PROTECT_RW(0x10000, 0x8000));
667
668         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
669         /*
670          * Disable the trusted memory range - we don't actually supported secure
671          * memory rendering at this point in time and we don't want to block off
672          * part of the virtual memory space.
673          */
674         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
676         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
677
678         /* Put the GPU into 64 bit by default */
679         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
680         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
681         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
682         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
683         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
684         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
685         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
686         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
687         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
688         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
689         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
690         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
691
692         /*
693          * VPC corner case with local memory load kill leads to corrupt
694          * internal state. Normal Disable does not work for all a5x chips.
695          * So do the following setting to disable it.
696          */
697         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
698                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
699                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
700         }
701
702         ret = adreno_hw_init(gpu);
703         if (ret)
704                 return ret;
705
706         if (!adreno_is_a510(adreno_gpu))
707                 a5xx_gpmu_ucode_init(gpu);
708
709         ret = a5xx_ucode_init(gpu);
710         if (ret)
711                 return ret;
712
713         /* Set the ringbuffer address */
714         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
715                 gpu->rb[0]->iova);
716
717         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
718                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
719
720         a5xx_preempt_hw_init(gpu);
721
722         /* Disable the interrupts through the initial bringup stage */
723         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
724
725         /* Clear ME_HALT to start the micro engine */
726         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
727         ret = a5xx_me_init(gpu);
728         if (ret)
729                 return ret;
730
731         ret = a5xx_power_init(gpu);
732         if (ret)
733                 return ret;
734
735         /*
736          * Send a pipeline event stat to get misbehaving counters to start
737          * ticking correctly
738          */
739         if (adreno_is_a530(adreno_gpu)) {
740                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
741                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
742
743                 gpu->funcs->flush(gpu, gpu->rb[0]);
744                 if (!a5xx_idle(gpu, gpu->rb[0]))
745                         return -EINVAL;
746         }
747
748         /*
749          * If the chip that we are using does support loading one, then
750          * try to load a zap shader into the secure world. If successful
751          * we can use the CP to switch out of secure mode. If not then we
752          * have no resource but to try to switch ourselves out manually. If we
753          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
754          * be blocked and a permissions violation will soon follow.
755          */
756         ret = a5xx_zap_shader_init(gpu);
757         if (!ret) {
758                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
759                 OUT_RING(gpu->rb[0], 0x00000000);
760
761                 gpu->funcs->flush(gpu, gpu->rb[0]);
762                 if (!a5xx_idle(gpu, gpu->rb[0]))
763                         return -EINVAL;
764         } else if (ret == -ENODEV) {
765                 /*
766                  * This device does not use zap shader (but print a warning
767                  * just in case someone got their dt wrong.. hopefully they
768                  * have a debug UART to realize the error of their ways...
769                  * if you mess this up you are about to crash horribly)
770                  */
771                 dev_warn_once(gpu->dev->dev,
772                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
773                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
774         } else {
775                 return ret;
776         }
777
778         /* Last step - yield the ringbuffer */
779         a5xx_preempt_start(gpu);
780
781         return 0;
782 }
783
784 static void a5xx_recover(struct msm_gpu *gpu)
785 {
786         int i;
787
788         adreno_dump_info(gpu);
789
790         for (i = 0; i < 8; i++) {
791                 printk("CP_SCRATCH_REG%d: %u\n", i,
792                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
793         }
794
795         if (hang_debug)
796                 a5xx_dump(gpu);
797
798         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
799         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
800         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
801         adreno_recover(gpu);
802 }
803
804 static void a5xx_destroy(struct msm_gpu *gpu)
805 {
806         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
807         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
808
809         DBG("%s", gpu->name);
810
811         a5xx_preempt_fini(gpu);
812
813         if (a5xx_gpu->pm4_bo) {
814                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
815                 drm_gem_object_put(a5xx_gpu->pm4_bo);
816         }
817
818         if (a5xx_gpu->pfp_bo) {
819                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
820                 drm_gem_object_put(a5xx_gpu->pfp_bo);
821         }
822
823         if (a5xx_gpu->gpmu_bo) {
824                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
825                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
826         }
827
828         adreno_gpu_cleanup(adreno_gpu);
829         kfree(a5xx_gpu);
830 }
831
832 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
833 {
834         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
835                 return false;
836
837         /*
838          * Nearly every abnormality ends up pausing the GPU and triggering a
839          * fault so we can safely just watch for this one interrupt to fire
840          */
841         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
842                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
843 }
844
845 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
846 {
847         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
848         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
849
850         if (ring != a5xx_gpu->cur_ring) {
851                 WARN(1, "Tried to idle a non-current ringbuffer\n");
852                 return false;
853         }
854
855         /* wait for CP to drain ringbuffer: */
856         if (!adreno_idle(gpu, ring))
857                 return false;
858
859         if (spin_until(_a5xx_check_idle(gpu))) {
860                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
861                         gpu->name, __builtin_return_address(0),
862                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
863                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
864                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
865                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
866                 return false;
867         }
868
869         return true;
870 }
871
872 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
873 {
874         struct msm_gpu *gpu = arg;
875         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
876                         iova, flags,
877                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
878                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
879                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
880                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
881
882         return -EFAULT;
883 }
884
885 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
886 {
887         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
888
889         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
890                 u32 val;
891
892                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
893
894                 /*
895                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
896                  * read it twice
897                  */
898
899                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
900                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
901
902                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
903                         val);
904         }
905
906         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
907                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
908                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
909
910         if (status & A5XX_CP_INT_CP_DMA_ERROR)
911                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
912
913         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
914                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
915
916                 dev_err_ratelimited(gpu->dev->dev,
917                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
918                         val & (1 << 24) ? "WRITE" : "READ",
919                         (val & 0xFFFFF) >> 2, val);
920         }
921
922         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
923                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
924                 const char *access[16] = { "reserved", "reserved",
925                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
926                         "", "", "me read", "me write", "", "", "crashdump read",
927                         "crashdump write" };
928
929                 dev_err_ratelimited(gpu->dev->dev,
930                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
931                         status & 0xFFFFF, access[(status >> 24) & 0xF],
932                         (status & (1 << 31)), status);
933         }
934 }
935
936 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
937 {
938         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
939                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
940
941                 dev_err_ratelimited(gpu->dev->dev,
942                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
943                         val & (1 << 28) ? "WRITE" : "READ",
944                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
945                         (val >> 24) & 0xF);
946
947                 /* Clear the error */
948                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
949
950                 /* Clear the interrupt */
951                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
952                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
953         }
954
955         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
956                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
957
958         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
959                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
960                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
961
962         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
963                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
964                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
965
966         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
967                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
968                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
969
970         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
971                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
972
973         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
974                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
975 }
976
977 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
978 {
979         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
980
981         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
982
983         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
984                 addr);
985 }
986
987 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
988 {
989         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
990 }
991
992 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
993 {
994         struct drm_device *dev = gpu->dev;
995         struct msm_drm_private *priv = dev->dev_private;
996         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
997
998         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
999                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1000                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1001                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1002                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1003                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1004                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1005                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1006                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1007
1008         /* Turn off the hangcheck timer to keep it from bothering us */
1009         del_timer(&gpu->hangcheck_timer);
1010
1011         queue_work(priv->wq, &gpu->recover_work);
1012 }
1013
1014 #define RBBM_ERROR_MASK \
1015         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1016         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1017         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1018         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1019         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1020         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1021
1022 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1023 {
1024         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1025
1026         /*
1027          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1028          * before the source is cleared the interrupt will storm.
1029          */
1030         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1031                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1032
1033         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1034         if (status & RBBM_ERROR_MASK)
1035                 a5xx_rbbm_err_irq(gpu, status);
1036
1037         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1038                 a5xx_cp_err_irq(gpu);
1039
1040         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1041                 a5xx_fault_detect_irq(gpu);
1042
1043         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1044                 a5xx_uche_err_irq(gpu);
1045
1046         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1047                 a5xx_gpmu_err_irq(gpu);
1048
1049         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1050                 a5xx_preempt_trigger(gpu);
1051                 msm_gpu_retire(gpu);
1052         }
1053
1054         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1055                 a5xx_preempt_irq(gpu);
1056
1057         return IRQ_HANDLED;
1058 }
1059
1060 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1061         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1062         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1063         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1064         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1065                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1066         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1067         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1068         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1069 };
1070
1071 static const u32 a5xx_registers[] = {
1072         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1073         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1074         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1075         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1076         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1077         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1078         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1079         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1080         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1081         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1082         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1083         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1084         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1085         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1086         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1087         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1088         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1089         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1090         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1091         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1092         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1093         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1094         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1095         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1096         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1097         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1098         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1099         0xAC60, 0xAC60, ~0,
1100 };
1101
1102 static void a5xx_dump(struct msm_gpu *gpu)
1103 {
1104         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1105                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1106         adreno_dump(gpu);
1107 }
1108
1109 static int a5xx_pm_resume(struct msm_gpu *gpu)
1110 {
1111         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1112         int ret;
1113
1114         /* Turn on the core power */
1115         ret = msm_gpu_pm_resume(gpu);
1116         if (ret)
1117                 return ret;
1118
1119         if (adreno_is_a510(adreno_gpu)) {
1120                 /* Halt the sp_input_clk at HM level */
1121                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1122                 a5xx_set_hwcg(gpu, true);
1123                 /* Turn on sp_input_clk at HM level */
1124                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1125                 return 0;
1126         }
1127
1128         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1129         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1130
1131         /* Wait 3 usecs before polling */
1132         udelay(3);
1133
1134         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1135                 (1 << 20), (1 << 20));
1136         if (ret) {
1137                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1138                         gpu->name,
1139                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1140                 return ret;
1141         }
1142
1143         /* Turn on the SP domain */
1144         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1145         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1146                 (1 << 20), (1 << 20));
1147         if (ret)
1148                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1149                         gpu->name);
1150
1151         return ret;
1152 }
1153
1154 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1155 {
1156         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1157         u32 mask = 0xf;
1158
1159         /* A510 has 3 XIN ports in VBIF */
1160         if (adreno_is_a510(adreno_gpu))
1161                 mask = 0x7;
1162
1163         /* Clear the VBIF pipe before shutting down */
1164         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1165         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1166                                 mask) == mask);
1167
1168         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1169
1170         /*
1171          * Reset the VBIF before power collapse to avoid issue with FIFO
1172          * entries
1173          */
1174         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1175         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1176
1177         return msm_gpu_pm_suspend(gpu);
1178 }
1179
1180 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1181 {
1182         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1183                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1184
1185         return 0;
1186 }
1187
1188 struct a5xx_crashdumper {
1189         void *ptr;
1190         struct drm_gem_object *bo;
1191         u64 iova;
1192 };
1193
1194 struct a5xx_gpu_state {
1195         struct msm_gpu_state base;
1196         u32 *hlsqregs;
1197 };
1198
1199 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1200                 struct a5xx_crashdumper *dumper)
1201 {
1202         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1203                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1204                 &dumper->bo, &dumper->iova);
1205
1206         if (!IS_ERR(dumper->ptr))
1207                 msm_gem_object_set_name(dumper->bo, "crashdump");
1208
1209         return PTR_ERR_OR_ZERO(dumper->ptr);
1210 }
1211
1212 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1213                 struct a5xx_crashdumper *dumper)
1214 {
1215         u32 val;
1216
1217         if (IS_ERR_OR_NULL(dumper->ptr))
1218                 return -EINVAL;
1219
1220         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1221                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1222
1223         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1224
1225         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1226                 val & 0x04, 100, 10000);
1227 }
1228
1229 /*
1230  * These are a list of the registers that need to be read through the HLSQ
1231  * aperture through the crashdumper.  These are not nominally accessible from
1232  * the CPU on a secure platform.
1233  */
1234 static const struct {
1235         u32 type;
1236         u32 regoffset;
1237         u32 count;
1238 } a5xx_hlsq_aperture_regs[] = {
1239         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1240         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1241         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1242         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1243         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1244         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1245         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1246         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1247         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1248         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1249         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1250         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1251         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1252         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1253         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1254 };
1255
1256 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1257                 struct a5xx_gpu_state *a5xx_state)
1258 {
1259         struct a5xx_crashdumper dumper = { 0 };
1260         u32 offset, count = 0;
1261         u64 *ptr;
1262         int i;
1263
1264         if (a5xx_crashdumper_init(gpu, &dumper))
1265                 return;
1266
1267         /* The script will be written at offset 0 */
1268         ptr = dumper.ptr;
1269
1270         /* Start writing the data at offset 256k */
1271         offset = dumper.iova + (256 * SZ_1K);
1272
1273         /* Count how many additional registers to get from the HLSQ aperture */
1274         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1275                 count += a5xx_hlsq_aperture_regs[i].count;
1276
1277         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1278         if (!a5xx_state->hlsqregs)
1279                 return;
1280
1281         /* Build the crashdump script */
1282         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1283                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1284                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1285
1286                 /* Write the register to select the desired bank */
1287                 *ptr++ = ((u64) type << 8);
1288                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1289                         (1 << 21) | 1;
1290
1291                 *ptr++ = offset;
1292                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1293                         | c;
1294
1295                 offset += c * sizeof(u32);
1296         }
1297
1298         /* Write two zeros to close off the script */
1299         *ptr++ = 0;
1300         *ptr++ = 0;
1301
1302         if (a5xx_crashdumper_run(gpu, &dumper)) {
1303                 kfree(a5xx_state->hlsqregs);
1304                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305                 return;
1306         }
1307
1308         /* Copy the data from the crashdumper to the state */
1309         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1310                 count * sizeof(u32));
1311
1312         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1313 }
1314
1315 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1316 {
1317         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1318                         GFP_KERNEL);
1319
1320         if (!a5xx_state)
1321                 return ERR_PTR(-ENOMEM);
1322
1323         /* Temporarily disable hardware clock gating before reading the hw */
1324         a5xx_set_hwcg(gpu, false);
1325
1326         /* First get the generic state from the adreno core */
1327         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1328
1329         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1330
1331         /* Get the HLSQ regs with the help of the crashdumper */
1332         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1333
1334         a5xx_set_hwcg(gpu, true);
1335
1336         return &a5xx_state->base;
1337 }
1338
1339 static void a5xx_gpu_state_destroy(struct kref *kref)
1340 {
1341         struct msm_gpu_state *state = container_of(kref,
1342                 struct msm_gpu_state, ref);
1343         struct a5xx_gpu_state *a5xx_state = container_of(state,
1344                 struct a5xx_gpu_state, base);
1345
1346         kfree(a5xx_state->hlsqregs);
1347
1348         adreno_gpu_state_destroy(state);
1349         kfree(a5xx_state);
1350 }
1351
1352 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1353 {
1354         if (IS_ERR_OR_NULL(state))
1355                 return 1;
1356
1357         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1358 }
1359
1360
1361 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1362 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1363                       struct drm_printer *p)
1364 {
1365         int i, j;
1366         u32 pos = 0;
1367         struct a5xx_gpu_state *a5xx_state = container_of(state,
1368                 struct a5xx_gpu_state, base);
1369
1370         if (IS_ERR_OR_NULL(state))
1371                 return;
1372
1373         adreno_show(gpu, state, p);
1374
1375         /* Dump the additional a5xx HLSQ registers */
1376         if (!a5xx_state->hlsqregs)
1377                 return;
1378
1379         drm_printf(p, "registers-hlsq:\n");
1380
1381         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1382                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1383                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1384
1385                 for (j = 0; j < c; j++, pos++, o++) {
1386                         /*
1387                          * To keep the crashdump simple we pull the entire range
1388                          * for each register type but not all of the registers
1389                          * in the range are valid. Fortunately invalid registers
1390                          * stick out like a sore thumb with a value of
1391                          * 0xdeadbeef
1392                          */
1393                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1394                                 continue;
1395
1396                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1397                                 o << 2, a5xx_state->hlsqregs[pos]);
1398                 }
1399         }
1400 }
1401 #endif
1402
1403 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1404 {
1405         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1406         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1407
1408         return a5xx_gpu->cur_ring;
1409 }
1410
1411 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1412 {
1413         u64 busy_cycles, busy_time;
1414
1415         /* Only read the gpu busy if the hardware is already active */
1416         if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1417                 return 0;
1418
1419         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1420                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1421
1422         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1423         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1424
1425         gpu->devfreq.busy_cycles = busy_cycles;
1426
1427         pm_runtime_put(&gpu->pdev->dev);
1428
1429         if (WARN_ON(busy_time > ~0LU))
1430                 return ~0LU;
1431
1432         return (unsigned long)busy_time;
1433 }
1434
1435 static const struct adreno_gpu_funcs funcs = {
1436         .base = {
1437                 .get_param = adreno_get_param,
1438                 .hw_init = a5xx_hw_init,
1439                 .pm_suspend = a5xx_pm_suspend,
1440                 .pm_resume = a5xx_pm_resume,
1441                 .recover = a5xx_recover,
1442                 .submit = a5xx_submit,
1443                 .flush = a5xx_flush,
1444                 .active_ring = a5xx_active_ring,
1445                 .irq = a5xx_irq,
1446                 .destroy = a5xx_destroy,
1447 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1448                 .show = a5xx_show,
1449 #endif
1450 #if defined(CONFIG_DEBUG_FS)
1451                 .debugfs_init = a5xx_debugfs_init,
1452 #endif
1453                 .gpu_busy = a5xx_gpu_busy,
1454                 .gpu_state_get = a5xx_gpu_state_get,
1455                 .gpu_state_put = a5xx_gpu_state_put,
1456                 .create_address_space = adreno_iommu_create_address_space,
1457         },
1458         .get_timestamp = a5xx_get_timestamp,
1459 };
1460
1461 static void check_speed_bin(struct device *dev)
1462 {
1463         struct nvmem_cell *cell;
1464         u32 val;
1465
1466         /*
1467          * If the OPP table specifies a opp-supported-hw property then we have
1468          * to set something with dev_pm_opp_set_supported_hw() or the table
1469          * doesn't get populated so pick an arbitrary value that should
1470          * ensure the default frequencies are selected but not conflict with any
1471          * actual bins
1472          */
1473         val = 0x80;
1474
1475         cell = nvmem_cell_get(dev, "speed_bin");
1476
1477         if (!IS_ERR(cell)) {
1478                 void *buf = nvmem_cell_read(cell, NULL);
1479
1480                 if (!IS_ERR(buf)) {
1481                         u8 bin = *((u8 *) buf);
1482
1483                         val = (1 << bin);
1484                         kfree(buf);
1485                 }
1486
1487                 nvmem_cell_put(cell);
1488         }
1489
1490         dev_pm_opp_set_supported_hw(dev, &val, 1);
1491 }
1492
1493 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1494 {
1495         struct msm_drm_private *priv = dev->dev_private;
1496         struct platform_device *pdev = priv->gpu_pdev;
1497         struct a5xx_gpu *a5xx_gpu = NULL;
1498         struct adreno_gpu *adreno_gpu;
1499         struct msm_gpu *gpu;
1500         int ret;
1501
1502         if (!pdev) {
1503                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1504                 return ERR_PTR(-ENXIO);
1505         }
1506
1507         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1508         if (!a5xx_gpu)
1509                 return ERR_PTR(-ENOMEM);
1510
1511         adreno_gpu = &a5xx_gpu->base;
1512         gpu = &adreno_gpu->base;
1513
1514         adreno_gpu->registers = a5xx_registers;
1515         adreno_gpu->reg_offsets = a5xx_register_offsets;
1516
1517         a5xx_gpu->lm_leakage = 0x4E001A;
1518
1519         check_speed_bin(&pdev->dev);
1520
1521         /* Restricting nr_rings to 1 to temporarily disable preemption */
1522         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1523         if (ret) {
1524                 a5xx_destroy(&(a5xx_gpu->base.base));
1525                 return ERR_PTR(ret);
1526         }
1527
1528         if (gpu->aspace)
1529                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1530
1531         /* Set up the preemption specific bits and pieces for each ringbuffer */
1532         a5xx_preempt_init(gpu);
1533
1534         return gpu;
1535 }