drm/nouveau: fence: fix undefined fence state after emit
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/firmware/qcom/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25
26         if (a5xx_gpu->has_whereami) {
27                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
28                 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
29                 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
30         }
31 }
32
33 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
34                 bool sync)
35 {
36         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
37         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
38         uint32_t wptr;
39         unsigned long flags;
40
41         /*
42          * Most flush operations need to issue a WHERE_AM_I opcode to sync up
43          * the rptr shadow
44          */
45         if (sync)
46                 update_shadow_rptr(gpu, ring);
47
48         spin_lock_irqsave(&ring->preempt_lock, flags);
49
50         /* Copy the shadow to the actual register */
51         ring->cur = ring->next;
52
53         /* Make sure to wrap wptr if we need to */
54         wptr = get_wptr(ring);
55
56         spin_unlock_irqrestore(&ring->preempt_lock, flags);
57
58         /* Make sure everything is posted before making a decision */
59         mb();
60
61         /* Update HW if this is the current ring and we are not in preempt */
62         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
63                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
64 }
65
66 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
67 {
68         struct msm_ringbuffer *ring = submit->ring;
69         struct msm_gem_object *obj;
70         uint32_t *ptr, dwords;
71         unsigned int i;
72
73         for (i = 0; i < submit->nr_cmds; i++) {
74                 switch (submit->cmd[i].type) {
75                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
76                         break;
77                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
78                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
79                                 break;
80                         fallthrough;
81                 case MSM_SUBMIT_CMD_BUF:
82                         /* copy commands into RB: */
83                         obj = submit->bos[submit->cmd[i].idx].obj;
84                         dwords = submit->cmd[i].size;
85
86                         ptr = msm_gem_get_vaddr(&obj->base);
87
88                         /* _get_vaddr() shouldn't fail at this point,
89                          * since we've already mapped it once in
90                          * submit_reloc()
91                          */
92                         if (WARN_ON(!ptr))
93                                 return;
94
95                         for (i = 0; i < dwords; i++) {
96                                 /* normally the OUT_PKTn() would wait
97                                  * for space for the packet.  But since
98                                  * we just OUT_RING() the whole thing,
99                                  * need to call adreno_wait_ring()
100                                  * ourself:
101                                  */
102                                 adreno_wait_ring(ring, 1);
103                                 OUT_RING(ring, ptr[i]);
104                         }
105
106                         msm_gem_put_vaddr(&obj->base);
107
108                         break;
109                 }
110         }
111
112         a5xx_flush(gpu, ring, true);
113         a5xx_preempt_trigger(gpu);
114
115         /* we might not necessarily have a cmd from userspace to
116          * trigger an event to know that submit has completed, so
117          * do this manually:
118          */
119         a5xx_idle(gpu, ring);
120         ring->memptrs->fence = submit->seqno;
121         msm_gpu_retire(gpu);
122 }
123
124 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
125 {
126         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
127         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
128         struct msm_ringbuffer *ring = submit->ring;
129         unsigned int i, ibs = 0;
130
131         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
132                 gpu->cur_ctx_seqno = 0;
133                 a5xx_submit_in_rb(gpu, submit);
134                 return;
135         }
136
137         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
138         OUT_RING(ring, 0x02);
139
140         /* Turn off protected mode to write to special registers */
141         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
142         OUT_RING(ring, 0);
143
144         /* Set the save preemption record for the ring/command */
145         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
146         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
147         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
148
149         /* Turn back on protected mode */
150         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
151         OUT_RING(ring, 1);
152
153         /* Enable local preemption for finegrain preemption */
154         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
155         OUT_RING(ring, 0x1);
156
157         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
158         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
159         OUT_RING(ring, 0x02);
160
161         /* Submit the commands */
162         for (i = 0; i < submit->nr_cmds; i++) {
163                 switch (submit->cmd[i].type) {
164                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
165                         break;
166                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
167                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
168                                 break;
169                         fallthrough;
170                 case MSM_SUBMIT_CMD_BUF:
171                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
172                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
173                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
174                         OUT_RING(ring, submit->cmd[i].size);
175                         ibs++;
176                         break;
177                 }
178
179                 /*
180                  * Periodically update shadow-wptr if needed, so that we
181                  * can see partial progress of submits with large # of
182                  * cmds.. otherwise we could needlessly stall waiting for
183                  * ringbuffer state, simply due to looking at a shadow
184                  * rptr value that has not been updated
185                  */
186                 if ((ibs % 32) == 0)
187                         update_shadow_rptr(gpu, ring);
188         }
189
190         /*
191          * Write the render mode to NULL (0) to indicate to the CP that the IBs
192          * are done rendering - otherwise a lucky preemption would start
193          * replaying from the last checkpoint
194          */
195         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
196         OUT_RING(ring, 0);
197         OUT_RING(ring, 0);
198         OUT_RING(ring, 0);
199         OUT_RING(ring, 0);
200         OUT_RING(ring, 0);
201
202         /* Turn off IB level preemptions */
203         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
204         OUT_RING(ring, 0x01);
205
206         /* Write the fence to the scratch register */
207         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
208         OUT_RING(ring, submit->seqno);
209
210         /*
211          * Execute a CACHE_FLUSH_TS event. This will ensure that the
212          * timestamp is written to the memory and then triggers the interrupt
213          */
214         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
215         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
216                 CP_EVENT_WRITE_0_IRQ);
217         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
218         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
219         OUT_RING(ring, submit->seqno);
220
221         /* Yield the floor on command completion */
222         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
223         /*
224          * If dword[2:1] are non zero, they specify an address for the CP to
225          * write the value of dword[3] to on preemption complete. Write 0 to
226          * skip the write
227          */
228         OUT_RING(ring, 0x00);
229         OUT_RING(ring, 0x00);
230         /* Data value - not used if the address above is 0 */
231         OUT_RING(ring, 0x01);
232         /* Set bit 0 to trigger an interrupt on preempt complete */
233         OUT_RING(ring, 0x01);
234
235         /* A WHERE_AM_I packet is not needed after a YIELD */
236         a5xx_flush(gpu, ring, false);
237
238         /* Check to see if we need to start preemption */
239         a5xx_preempt_trigger(gpu);
240 }
241
242 static const struct adreno_five_hwcg_regs {
243         u32 offset;
244         u32 value;
245 } a5xx_hwcg[] = {
246         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
247         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
248         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
249         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
250         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
251         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
252         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
253         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
254         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
255         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
256         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
257         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
258         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
259         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
260         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
261         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
262         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
263         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
264         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
265         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
266         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
267         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
268         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
269         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
270         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
271         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
272         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
273         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
274         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
275         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
276         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
277         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
278         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
279         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
280         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
281         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
282         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
283         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
284         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
285         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
286         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
287         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
288         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
289         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
290         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
291         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
292         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
293         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
294         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
295         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
296         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
297         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
298         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
299         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
300         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
301         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
302         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
303         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
304         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
305         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
306         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
307         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
308         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
309         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
310         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
311         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
312         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
313         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
314         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
315         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
316         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
317         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
318         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
319         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
320         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
321         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
322         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
323         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
324         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
325         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
326         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
327         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
328         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
329         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
330         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
331         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
332         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
333         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
334         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
335         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
336         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
337         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
338 }, a50x_hwcg[] = {
339         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
340         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
341         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
342         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
343         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
344         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
345         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
346         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
347         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
348         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
349         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
350         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
351         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
352         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
353         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
354         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
355         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
356         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
357         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
358         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
359         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
360         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
361         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
362         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
363         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
364         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
365         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
366         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
367         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
368         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
369         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
370         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
371         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
372         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
373         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
374         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
375         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
376         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
377 }, a512_hwcg[] = {
378         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
379         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
380         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
381         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
382         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
383         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
384         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
385         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
386         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
387         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
388         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
389         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
390         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
391         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
392         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
393         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
394         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
395         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
396         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
397         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
398         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
399         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
400         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
401         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
402         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
403         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
404         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
405         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
406         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
407         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
408         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
409         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
410         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
411         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
412         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
413         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
414         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
415         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
416         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
417         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
418         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
419         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
420         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
421         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
422         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
423         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
424         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
425         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
426         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
427         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
428         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
429         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
430         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
431         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
432         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
433         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
434 };
435
436 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
437 {
438         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
439         const struct adreno_five_hwcg_regs *regs;
440         unsigned int i, sz;
441
442         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu)) {
443                 regs = a50x_hwcg;
444                 sz = ARRAY_SIZE(a50x_hwcg);
445         } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
446                 regs = a512_hwcg;
447                 sz = ARRAY_SIZE(a512_hwcg);
448         } else {
449                 regs = a5xx_hwcg;
450                 sz = ARRAY_SIZE(a5xx_hwcg);
451         }
452
453         for (i = 0; i < sz; i++)
454                 gpu_write(gpu, regs[i].offset,
455                           state ? regs[i].value : 0);
456
457         if (adreno_is_a540(adreno_gpu)) {
458                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
459                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
460         }
461
462         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
463         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
464 }
465
466 static int a5xx_me_init(struct msm_gpu *gpu)
467 {
468         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
469         struct msm_ringbuffer *ring = gpu->rb[0];
470
471         OUT_PKT7(ring, CP_ME_INIT, 8);
472
473         OUT_RING(ring, 0x0000002F);
474
475         /* Enable multiple hardware contexts */
476         OUT_RING(ring, 0x00000003);
477
478         /* Enable error detection */
479         OUT_RING(ring, 0x20000000);
480
481         /* Don't enable header dump */
482         OUT_RING(ring, 0x00000000);
483         OUT_RING(ring, 0x00000000);
484
485         /* Specify workarounds for various microcode issues */
486         if (adreno_is_a506(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
487                 /* Workaround for token end syncs
488                  * Force a WFI after every direct-render 3D mode draw and every
489                  * 2D mode 3 draw
490                  */
491                 OUT_RING(ring, 0x0000000B);
492         } else if (adreno_is_a510(adreno_gpu)) {
493                 /* Workaround for token and syncs */
494                 OUT_RING(ring, 0x00000001);
495         } else {
496                 /* No workarounds enabled */
497                 OUT_RING(ring, 0x00000000);
498         }
499
500         OUT_RING(ring, 0x00000000);
501         OUT_RING(ring, 0x00000000);
502
503         a5xx_flush(gpu, ring, true);
504         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
505 }
506
507 static int a5xx_preempt_start(struct msm_gpu *gpu)
508 {
509         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
510         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
511         struct msm_ringbuffer *ring = gpu->rb[0];
512
513         if (gpu->nr_rings == 1)
514                 return 0;
515
516         /* Turn off protected mode to write to special registers */
517         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
518         OUT_RING(ring, 0);
519
520         /* Set the save preemption record for the ring/command */
521         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
522         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
523         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
524
525         /* Turn back on protected mode */
526         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
527         OUT_RING(ring, 1);
528
529         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
530         OUT_RING(ring, 0x00);
531
532         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
533         OUT_RING(ring, 0x01);
534
535         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
536         OUT_RING(ring, 0x01);
537
538         /* Yield the floor on command completion */
539         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
540         OUT_RING(ring, 0x00);
541         OUT_RING(ring, 0x00);
542         OUT_RING(ring, 0x01);
543         OUT_RING(ring, 0x01);
544
545         /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
546         a5xx_flush(gpu, ring, false);
547
548         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
549 }
550
551 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
552                 struct drm_gem_object *obj)
553 {
554         u32 *buf = msm_gem_get_vaddr(obj);
555
556         if (IS_ERR(buf))
557                 return;
558
559         /*
560          * If the lowest nibble is 0xa that is an indication that this microcode
561          * has been patched. The actual version is in dword [3] but we only care
562          * about the patchlevel which is the lowest nibble of dword [3]
563          */
564         if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
565                 a5xx_gpu->has_whereami = true;
566
567         msm_gem_put_vaddr(obj);
568 }
569
570 static int a5xx_ucode_load(struct msm_gpu *gpu)
571 {
572         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
573         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
574         int ret;
575
576         if (!a5xx_gpu->pm4_bo) {
577                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
578                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
579
580
581                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
582                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
583                         a5xx_gpu->pm4_bo = NULL;
584                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
585                                 ret);
586                         return ret;
587                 }
588
589                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
590         }
591
592         if (!a5xx_gpu->pfp_bo) {
593                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
594                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
595
596                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
597                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
598                         a5xx_gpu->pfp_bo = NULL;
599                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
600                                 ret);
601                         return ret;
602                 }
603
604                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
605                 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
606         }
607
608         if (a5xx_gpu->has_whereami) {
609                 if (!a5xx_gpu->shadow_bo) {
610                         a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
611                                 sizeof(u32) * gpu->nr_rings,
612                                 MSM_BO_WC | MSM_BO_MAP_PRIV,
613                                 gpu->aspace, &a5xx_gpu->shadow_bo,
614                                 &a5xx_gpu->shadow_iova);
615
616                         if (IS_ERR(a5xx_gpu->shadow))
617                                 return PTR_ERR(a5xx_gpu->shadow);
618
619                         msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
620                 }
621         } else if (gpu->nr_rings > 1) {
622                 /* Disable preemption if WHERE_AM_I isn't available */
623                 a5xx_preempt_fini(gpu);
624                 gpu->nr_rings = 1;
625         }
626
627         return 0;
628 }
629
630 #define SCM_GPU_ZAP_SHADER_RESUME 0
631
632 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
633 {
634         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
635         int ret;
636
637         /*
638          * Adreno 506 have CPZ Retention feature and doesn't require
639          * to resume zap shader
640          */
641         if (adreno_is_a506(adreno_gpu))
642                 return 0;
643
644         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
645         if (ret)
646                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
647                         gpu->name, ret);
648
649         return ret;
650 }
651
652 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
653 {
654         static bool loaded;
655         int ret;
656
657         /*
658          * If the zap shader is already loaded into memory we just need to kick
659          * the remote processor to reinitialize it
660          */
661         if (loaded)
662                 return a5xx_zap_shader_resume(gpu);
663
664         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
665
666         loaded = !ret;
667         return ret;
668 }
669
670 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
671           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
672           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
673           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
674           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
675           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
676           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
677           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
678           A5XX_RBBM_INT_0_MASK_CP_SW | \
679           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
680           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
681           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
682
683 static int a5xx_hw_init(struct msm_gpu *gpu)
684 {
685         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
686         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
687         u32 regbit;
688         int ret;
689
690         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
691
692         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
693             adreno_is_a540(adreno_gpu))
694                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
695
696         /* Make all blocks contribute to the GPU BUSY perf counter */
697         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
698
699         /* Enable RBBM error reporting bits */
700         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
701
702         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
703                 /*
704                  * Mask out the activity signals from RB1-3 to avoid false
705                  * positives
706                  */
707
708                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
709                         0xF0000000);
710                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
711                         0xFFFFFFFF);
712                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
713                         0xFFFFFFFF);
714                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
715                         0xFFFFFFFF);
716                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
717                         0xFFFFFFFF);
718                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
719                         0xFFFFFFFF);
720                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
721                         0xFFFFFFFF);
722                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
723                         0xFFFFFFFF);
724         }
725
726         /* Enable fault detection */
727         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
728                 (1 << 30) | 0xFFFF);
729
730         /* Turn on performance counters */
731         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
732
733         /* Select CP0 to always count cycles */
734         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
735
736         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
737         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
738
739         /* Increase VFD cache access so LRZ and other data gets evicted less */
740         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
741
742         /* Disable L2 bypass in the UCHE */
743         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
744         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
745         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
746         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
747
748         /* Set the GMEM VA range (0 to gpu->gmem) */
749         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
750         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
751         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
752                 0x00100000 + adreno_gpu->gmem - 1);
753         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
754
755         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
756             adreno_is_a510(adreno_gpu)) {
757                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
758                 if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
759                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
760                 else
761                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
762                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
763                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
764         } else {
765                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
766                 if (adreno_is_a530(adreno_gpu))
767                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
768                 else
769                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
770                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
771                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
772         }
773
774         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
775                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
776                           (0x100 << 11 | 0x100 << 22));
777         else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
778                  adreno_is_a512(adreno_gpu))
779                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
780                           (0x200 << 11 | 0x200 << 22));
781         else
782                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
783                           (0x400 << 11 | 0x300 << 22));
784
785         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
786                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
787
788         /*
789          * Disable the RB sampler datapath DP2 clock gating optimization
790          * for 1-SP GPUs, as it is enabled by default.
791          */
792         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
793             adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu))
794                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
795
796         /* Disable UCHE global filter as SP can invalidate/flush independently */
797         gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
798
799         /* Enable USE_RETENTION_FLOPS */
800         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
801
802         /* Enable ME/PFP split notification */
803         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
804
805         /*
806          *  In A5x, CCU can send context_done event of a particular context to
807          *  UCHE which ultimately reaches CP even when there is valid
808          *  transaction of that context inside CCU. This can let CP to program
809          *  config registers, which will make the "valid transaction" inside
810          *  CCU to be interpreted differently. This can cause gpu fault. This
811          *  bug is fixed in latest A510 revision. To enable this bug fix -
812          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
813          *  (disable). For older A510 version this bit is unused.
814          */
815         if (adreno_is_a510(adreno_gpu))
816                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
817
818         /* Enable HWCG */
819         a5xx_set_hwcg(gpu, true);
820
821         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
822
823         /* Set the highest bank bit */
824         if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
825                 regbit = 2;
826         else
827                 regbit = 1;
828
829         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
830         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
831
832         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
833             adreno_is_a540(adreno_gpu))
834                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
835
836         /* Disable All flat shading optimization (ALLFLATOPTDIS) */
837         gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
838
839         /* Protect registers from the CP */
840         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
841
842         /* RBBM */
843         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
844         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
845         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
846         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
847         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
848         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
849
850         /* Content protect */
851         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
852                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
853                         16));
854         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
855                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
856
857         /* CP */
858         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
859         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
860         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
861         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
862
863         /* RB */
864         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
865         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
866
867         /* VPC */
868         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
869         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
870
871         /* UCHE */
872         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
873
874         /* SMMU */
875         gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
876                         ADRENO_PROTECT_RW(0x10000, 0x8000));
877
878         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
879         /*
880          * Disable the trusted memory range - we don't actually supported secure
881          * memory rendering at this point in time and we don't want to block off
882          * part of the virtual memory space.
883          */
884         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
885         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
886
887         /* Put the GPU into 64 bit by default */
888         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
889         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
890         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
891         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
892         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
893         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
894         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
895         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
896         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
897         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
898         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
899         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
900
901         /*
902          * VPC corner case with local memory load kill leads to corrupt
903          * internal state. Normal Disable does not work for all a5x chips.
904          * So do the following setting to disable it.
905          */
906         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
907                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
908                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
909         }
910
911         ret = adreno_hw_init(gpu);
912         if (ret)
913                 return ret;
914
915         if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
916                 a5xx_gpmu_ucode_init(gpu);
917
918         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
919         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
920
921         /* Set the ringbuffer address */
922         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
923
924         /*
925          * If the microcode supports the WHERE_AM_I opcode then we can use that
926          * in lieu of the RPTR shadow and enable preemption. Otherwise, we
927          * can't safely use the RPTR shadow or preemption. In either case, the
928          * RPTR shadow should be disabled in hardware.
929          */
930         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
931                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
932
933         /* Configure the RPTR shadow if needed: */
934         if (a5xx_gpu->shadow_bo) {
935                 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
936                             shadowptr(a5xx_gpu, gpu->rb[0]));
937         }
938
939         a5xx_preempt_hw_init(gpu);
940
941         /* Disable the interrupts through the initial bringup stage */
942         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
943
944         /* Clear ME_HALT to start the micro engine */
945         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
946         ret = a5xx_me_init(gpu);
947         if (ret)
948                 return ret;
949
950         ret = a5xx_power_init(gpu);
951         if (ret)
952                 return ret;
953
954         /*
955          * Send a pipeline event stat to get misbehaving counters to start
956          * ticking correctly
957          */
958         if (adreno_is_a530(adreno_gpu)) {
959                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
960                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
961
962                 a5xx_flush(gpu, gpu->rb[0], true);
963                 if (!a5xx_idle(gpu, gpu->rb[0]))
964                         return -EINVAL;
965         }
966
967         /*
968          * If the chip that we are using does support loading one, then
969          * try to load a zap shader into the secure world. If successful
970          * we can use the CP to switch out of secure mode. If not then we
971          * have no resource but to try to switch ourselves out manually. If we
972          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
973          * be blocked and a permissions violation will soon follow.
974          */
975         ret = a5xx_zap_shader_init(gpu);
976         if (!ret) {
977                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
978                 OUT_RING(gpu->rb[0], 0x00000000);
979
980                 a5xx_flush(gpu, gpu->rb[0], true);
981                 if (!a5xx_idle(gpu, gpu->rb[0]))
982                         return -EINVAL;
983         } else if (ret == -ENODEV) {
984                 /*
985                  * This device does not use zap shader (but print a warning
986                  * just in case someone got their dt wrong.. hopefully they
987                  * have a debug UART to realize the error of their ways...
988                  * if you mess this up you are about to crash horribly)
989                  */
990                 dev_warn_once(gpu->dev->dev,
991                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
992                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
993         } else {
994                 return ret;
995         }
996
997         /* Last step - yield the ringbuffer */
998         a5xx_preempt_start(gpu);
999
1000         return 0;
1001 }
1002
1003 static void a5xx_recover(struct msm_gpu *gpu)
1004 {
1005         int i;
1006
1007         adreno_dump_info(gpu);
1008
1009         for (i = 0; i < 8; i++) {
1010                 printk("CP_SCRATCH_REG%d: %u\n", i,
1011                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1012         }
1013
1014         if (hang_debug)
1015                 a5xx_dump(gpu);
1016
1017         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1018         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1019         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1020         adreno_recover(gpu);
1021 }
1022
1023 static void a5xx_destroy(struct msm_gpu *gpu)
1024 {
1025         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1026         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1027
1028         DBG("%s", gpu->name);
1029
1030         a5xx_preempt_fini(gpu);
1031
1032         if (a5xx_gpu->pm4_bo) {
1033                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1034                 drm_gem_object_put(a5xx_gpu->pm4_bo);
1035         }
1036
1037         if (a5xx_gpu->pfp_bo) {
1038                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1039                 drm_gem_object_put(a5xx_gpu->pfp_bo);
1040         }
1041
1042         if (a5xx_gpu->gpmu_bo) {
1043                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1044                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
1045         }
1046
1047         if (a5xx_gpu->shadow_bo) {
1048                 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1049                 drm_gem_object_put(a5xx_gpu->shadow_bo);
1050         }
1051
1052         adreno_gpu_cleanup(adreno_gpu);
1053         kfree(a5xx_gpu);
1054 }
1055
1056 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1057 {
1058         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1059                 return false;
1060
1061         /*
1062          * Nearly every abnormality ends up pausing the GPU and triggering a
1063          * fault so we can safely just watch for this one interrupt to fire
1064          */
1065         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1066                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1067 }
1068
1069 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1070 {
1071         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1072         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1073
1074         if (ring != a5xx_gpu->cur_ring) {
1075                 WARN(1, "Tried to idle a non-current ringbuffer\n");
1076                 return false;
1077         }
1078
1079         /* wait for CP to drain ringbuffer: */
1080         if (!adreno_idle(gpu, ring))
1081                 return false;
1082
1083         if (spin_until(_a5xx_check_idle(gpu))) {
1084                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1085                         gpu->name, __builtin_return_address(0),
1086                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1087                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1088                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1089                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1090                 return false;
1091         }
1092
1093         return true;
1094 }
1095
1096 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1097 {
1098         struct msm_gpu *gpu = arg;
1099         struct adreno_smmu_fault_info *info = data;
1100         char block[12] = "unknown";
1101         u32 scratch[] = {
1102                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1103                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1104                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1105                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)),
1106         };
1107
1108         if (info)
1109                 snprintf(block, sizeof(block), "%x", info->fsynr1);
1110
1111         return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1112 }
1113
1114 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1115 {
1116         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1117
1118         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1119                 u32 val;
1120
1121                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1122
1123                 /*
1124                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1125                  * read it twice
1126                  */
1127
1128                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1129                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1130
1131                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1132                         val);
1133         }
1134
1135         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1136                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1137                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1138
1139         if (status & A5XX_CP_INT_CP_DMA_ERROR)
1140                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1141
1142         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1143                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1144
1145                 dev_err_ratelimited(gpu->dev->dev,
1146                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1147                         val & (1 << 24) ? "WRITE" : "READ",
1148                         (val & 0xFFFFF) >> 2, val);
1149         }
1150
1151         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1152                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1153                 const char *access[16] = { "reserved", "reserved",
1154                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
1155                         "", "", "me read", "me write", "", "", "crashdump read",
1156                         "crashdump write" };
1157
1158                 dev_err_ratelimited(gpu->dev->dev,
1159                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1160                         status & 0xFFFFF, access[(status >> 24) & 0xF],
1161                         (status & (1 << 31)), status);
1162         }
1163 }
1164
1165 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1166 {
1167         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1168                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1169
1170                 dev_err_ratelimited(gpu->dev->dev,
1171                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1172                         val & (1 << 28) ? "WRITE" : "READ",
1173                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1174                         (val >> 24) & 0xF);
1175
1176                 /* Clear the error */
1177                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1178
1179                 /* Clear the interrupt */
1180                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1181                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1182         }
1183
1184         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1185                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1186
1187         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1188                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1189                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1190
1191         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1192                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1193                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1194
1195         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1196                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1197                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1198
1199         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1200                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1201
1202         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1203                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1204 }
1205
1206 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1207 {
1208         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1209
1210         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1211
1212         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1213                 addr);
1214 }
1215
1216 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1217 {
1218         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1219 }
1220
1221 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1222 {
1223         struct drm_device *dev = gpu->dev;
1224         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1225
1226         /*
1227          * If stalled on SMMU fault, we could trip the GPU's hang detection,
1228          * but the fault handler will trigger the devcore dump, and we want
1229          * to otherwise resume normally rather than killing the submit, so
1230          * just bail.
1231          */
1232         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1233                 return;
1234
1235         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1236                 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1237                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1238                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1239                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1240                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
1241                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1242                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
1243                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1244
1245         /* Turn off the hangcheck timer to keep it from bothering us */
1246         del_timer(&gpu->hangcheck_timer);
1247
1248         kthread_queue_work(gpu->worker, &gpu->recover_work);
1249 }
1250
1251 #define RBBM_ERROR_MASK \
1252         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1253         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1254         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1255         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1256         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1257         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1258
1259 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1260 {
1261         struct msm_drm_private *priv = gpu->dev->dev_private;
1262         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1263
1264         /*
1265          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1266          * before the source is cleared the interrupt will storm.
1267          */
1268         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1269                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1270
1271         if (priv->disable_err_irq) {
1272                 status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
1273                           A5XX_RBBM_INT_0_MASK_CP_SW;
1274         }
1275
1276         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1277         if (status & RBBM_ERROR_MASK)
1278                 a5xx_rbbm_err_irq(gpu, status);
1279
1280         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1281                 a5xx_cp_err_irq(gpu);
1282
1283         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1284                 a5xx_fault_detect_irq(gpu);
1285
1286         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1287                 a5xx_uche_err_irq(gpu);
1288
1289         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1290                 a5xx_gpmu_err_irq(gpu);
1291
1292         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1293                 a5xx_preempt_trigger(gpu);
1294                 msm_gpu_retire(gpu);
1295         }
1296
1297         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1298                 a5xx_preempt_irq(gpu);
1299
1300         return IRQ_HANDLED;
1301 }
1302
1303 static const u32 a5xx_registers[] = {
1304         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1305         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1306         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1307         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1308         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1309         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1310         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1311         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1312         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1313         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1314         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1315         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1316         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1317         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1318         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1319         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1320         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1321         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1322         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1323         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1324         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1325         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1326         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1327         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1328         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1329         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1330         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1331         0xAC60, 0xAC60, ~0,
1332 };
1333
1334 static void a5xx_dump(struct msm_gpu *gpu)
1335 {
1336         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1337                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1338         adreno_dump(gpu);
1339 }
1340
1341 static int a5xx_pm_resume(struct msm_gpu *gpu)
1342 {
1343         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1344         int ret;
1345
1346         /* Turn on the core power */
1347         ret = msm_gpu_pm_resume(gpu);
1348         if (ret)
1349                 return ret;
1350
1351         /* Adreno 506, 508, 509, 510, 512 needs manual RBBM sus/res control */
1352         if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1353                 /* Halt the sp_input_clk at HM level */
1354                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1355                 a5xx_set_hwcg(gpu, true);
1356                 /* Turn on sp_input_clk at HM level */
1357                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1358                 return 0;
1359         }
1360
1361         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1362         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1363
1364         /* Wait 3 usecs before polling */
1365         udelay(3);
1366
1367         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1368                 (1 << 20), (1 << 20));
1369         if (ret) {
1370                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1371                         gpu->name,
1372                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1373                 return ret;
1374         }
1375
1376         /* Turn on the SP domain */
1377         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1378         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1379                 (1 << 20), (1 << 20));
1380         if (ret)
1381                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1382                         gpu->name);
1383
1384         return ret;
1385 }
1386
1387 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1388 {
1389         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1390         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1391         u32 mask = 0xf;
1392         int i, ret;
1393
1394         /* A506, A508, A510 have 3 XIN ports in VBIF */
1395         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
1396             adreno_is_a510(adreno_gpu))
1397                 mask = 0x7;
1398
1399         /* Clear the VBIF pipe before shutting down */
1400         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1401         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1402                                 mask) == mask);
1403
1404         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1405
1406         /*
1407          * Reset the VBIF before power collapse to avoid issue with FIFO
1408          * entries on Adreno A510 and A530 (the others will tend to lock up)
1409          */
1410         if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1411                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1412                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1413         }
1414
1415         ret = msm_gpu_pm_suspend(gpu);
1416         if (ret)
1417                 return ret;
1418
1419         if (a5xx_gpu->has_whereami)
1420                 for (i = 0; i < gpu->nr_rings; i++)
1421                         a5xx_gpu->shadow[i] = 0;
1422
1423         return 0;
1424 }
1425
1426 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1427 {
1428         *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
1429
1430         return 0;
1431 }
1432
1433 struct a5xx_crashdumper {
1434         void *ptr;
1435         struct drm_gem_object *bo;
1436         u64 iova;
1437 };
1438
1439 struct a5xx_gpu_state {
1440         struct msm_gpu_state base;
1441         u32 *hlsqregs;
1442 };
1443
1444 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1445                 struct a5xx_crashdumper *dumper)
1446 {
1447         dumper->ptr = msm_gem_kernel_new(gpu->dev,
1448                 SZ_1M, MSM_BO_WC, gpu->aspace,
1449                 &dumper->bo, &dumper->iova);
1450
1451         if (!IS_ERR(dumper->ptr))
1452                 msm_gem_object_set_name(dumper->bo, "crashdump");
1453
1454         return PTR_ERR_OR_ZERO(dumper->ptr);
1455 }
1456
1457 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1458                 struct a5xx_crashdumper *dumper)
1459 {
1460         u32 val;
1461
1462         if (IS_ERR_OR_NULL(dumper->ptr))
1463                 return -EINVAL;
1464
1465         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
1466
1467         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1468
1469         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1470                 val & 0x04, 100, 10000);
1471 }
1472
1473 /*
1474  * These are a list of the registers that need to be read through the HLSQ
1475  * aperture through the crashdumper.  These are not nominally accessible from
1476  * the CPU on a secure platform.
1477  */
1478 static const struct {
1479         u32 type;
1480         u32 regoffset;
1481         u32 count;
1482 } a5xx_hlsq_aperture_regs[] = {
1483         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1484         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1485         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1486         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1487         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1488         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1489         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1490         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1491         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1492         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1493         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1494         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1495         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1496         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1497         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1498 };
1499
1500 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1501                 struct a5xx_gpu_state *a5xx_state)
1502 {
1503         struct a5xx_crashdumper dumper = { 0 };
1504         u32 offset, count = 0;
1505         u64 *ptr;
1506         int i;
1507
1508         if (a5xx_crashdumper_init(gpu, &dumper))
1509                 return;
1510
1511         /* The script will be written at offset 0 */
1512         ptr = dumper.ptr;
1513
1514         /* Start writing the data at offset 256k */
1515         offset = dumper.iova + (256 * SZ_1K);
1516
1517         /* Count how many additional registers to get from the HLSQ aperture */
1518         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1519                 count += a5xx_hlsq_aperture_regs[i].count;
1520
1521         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1522         if (!a5xx_state->hlsqregs)
1523                 return;
1524
1525         /* Build the crashdump script */
1526         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1527                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1528                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1529
1530                 /* Write the register to select the desired bank */
1531                 *ptr++ = ((u64) type << 8);
1532                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1533                         (1 << 21) | 1;
1534
1535                 *ptr++ = offset;
1536                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1537                         | c;
1538
1539                 offset += c * sizeof(u32);
1540         }
1541
1542         /* Write two zeros to close off the script */
1543         *ptr++ = 0;
1544         *ptr++ = 0;
1545
1546         if (a5xx_crashdumper_run(gpu, &dumper)) {
1547                 kfree(a5xx_state->hlsqregs);
1548                 msm_gem_kernel_put(dumper.bo, gpu->aspace);
1549                 return;
1550         }
1551
1552         /* Copy the data from the crashdumper to the state */
1553         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1554                 count * sizeof(u32));
1555
1556         msm_gem_kernel_put(dumper.bo, gpu->aspace);
1557 }
1558
1559 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1560 {
1561         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1562                         GFP_KERNEL);
1563         bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1564
1565         if (!a5xx_state)
1566                 return ERR_PTR(-ENOMEM);
1567
1568         /* Temporarily disable hardware clock gating before reading the hw */
1569         a5xx_set_hwcg(gpu, false);
1570
1571         /* First get the generic state from the adreno core */
1572         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1573
1574         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1575
1576         /*
1577          * Get the HLSQ regs with the help of the crashdumper, but only if
1578          * we are not stalled in an iommu fault (in which case the crashdumper
1579          * would not have access to memory)
1580          */
1581         if (!stalled)
1582                 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1583
1584         a5xx_set_hwcg(gpu, true);
1585
1586         return &a5xx_state->base;
1587 }
1588
1589 static void a5xx_gpu_state_destroy(struct kref *kref)
1590 {
1591         struct msm_gpu_state *state = container_of(kref,
1592                 struct msm_gpu_state, ref);
1593         struct a5xx_gpu_state *a5xx_state = container_of(state,
1594                 struct a5xx_gpu_state, base);
1595
1596         kfree(a5xx_state->hlsqregs);
1597
1598         adreno_gpu_state_destroy(state);
1599         kfree(a5xx_state);
1600 }
1601
1602 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1603 {
1604         if (IS_ERR_OR_NULL(state))
1605                 return 1;
1606
1607         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1608 }
1609
1610
1611 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1612 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1613                       struct drm_printer *p)
1614 {
1615         int i, j;
1616         u32 pos = 0;
1617         struct a5xx_gpu_state *a5xx_state = container_of(state,
1618                 struct a5xx_gpu_state, base);
1619
1620         if (IS_ERR_OR_NULL(state))
1621                 return;
1622
1623         adreno_show(gpu, state, p);
1624
1625         /* Dump the additional a5xx HLSQ registers */
1626         if (!a5xx_state->hlsqregs)
1627                 return;
1628
1629         drm_printf(p, "registers-hlsq:\n");
1630
1631         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1632                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1633                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1634
1635                 for (j = 0; j < c; j++, pos++, o++) {
1636                         /*
1637                          * To keep the crashdump simple we pull the entire range
1638                          * for each register type but not all of the registers
1639                          * in the range are valid. Fortunately invalid registers
1640                          * stick out like a sore thumb with a value of
1641                          * 0xdeadbeef
1642                          */
1643                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1644                                 continue;
1645
1646                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1647                                 o << 2, a5xx_state->hlsqregs[pos]);
1648                 }
1649         }
1650 }
1651 #endif
1652
1653 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1654 {
1655         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1656         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1657
1658         return a5xx_gpu->cur_ring;
1659 }
1660
1661 static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1662 {
1663         u64 busy_cycles;
1664
1665         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
1666         *out_sample_rate = clk_get_rate(gpu->core_clk);
1667
1668         return busy_cycles;
1669 }
1670
1671 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1672 {
1673         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1674         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1675
1676         if (a5xx_gpu->has_whereami)
1677                 return a5xx_gpu->shadow[ring->id];
1678
1679         return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1680 }
1681
1682 static const struct adreno_gpu_funcs funcs = {
1683         .base = {
1684                 .get_param = adreno_get_param,
1685                 .set_param = adreno_set_param,
1686                 .hw_init = a5xx_hw_init,
1687                 .ucode_load = a5xx_ucode_load,
1688                 .pm_suspend = a5xx_pm_suspend,
1689                 .pm_resume = a5xx_pm_resume,
1690                 .recover = a5xx_recover,
1691                 .submit = a5xx_submit,
1692                 .active_ring = a5xx_active_ring,
1693                 .irq = a5xx_irq,
1694                 .destroy = a5xx_destroy,
1695 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1696                 .show = a5xx_show,
1697 #endif
1698 #if defined(CONFIG_DEBUG_FS)
1699                 .debugfs_init = a5xx_debugfs_init,
1700 #endif
1701                 .gpu_busy = a5xx_gpu_busy,
1702                 .gpu_state_get = a5xx_gpu_state_get,
1703                 .gpu_state_put = a5xx_gpu_state_put,
1704                 .create_address_space = adreno_create_address_space,
1705                 .get_rptr = a5xx_get_rptr,
1706         },
1707         .get_timestamp = a5xx_get_timestamp,
1708 };
1709
1710 static void check_speed_bin(struct device *dev)
1711 {
1712         struct nvmem_cell *cell;
1713         u32 val;
1714
1715         /*
1716          * If the OPP table specifies a opp-supported-hw property then we have
1717          * to set something with dev_pm_opp_set_supported_hw() or the table
1718          * doesn't get populated so pick an arbitrary value that should
1719          * ensure the default frequencies are selected but not conflict with any
1720          * actual bins
1721          */
1722         val = 0x80;
1723
1724         cell = nvmem_cell_get(dev, "speed_bin");
1725
1726         if (!IS_ERR(cell)) {
1727                 void *buf = nvmem_cell_read(cell, NULL);
1728
1729                 if (!IS_ERR(buf)) {
1730                         u8 bin = *((u8 *) buf);
1731
1732                         val = (1 << bin);
1733                         kfree(buf);
1734                 }
1735
1736                 nvmem_cell_put(cell);
1737         }
1738
1739         devm_pm_opp_set_supported_hw(dev, &val, 1);
1740 }
1741
1742 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1743 {
1744         struct msm_drm_private *priv = dev->dev_private;
1745         struct platform_device *pdev = priv->gpu_pdev;
1746         struct adreno_platform_config *config = pdev->dev.platform_data;
1747         struct a5xx_gpu *a5xx_gpu = NULL;
1748         struct adreno_gpu *adreno_gpu;
1749         struct msm_gpu *gpu;
1750         unsigned int nr_rings;
1751         int ret;
1752
1753         if (!pdev) {
1754                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1755                 return ERR_PTR(-ENXIO);
1756         }
1757
1758         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1759         if (!a5xx_gpu)
1760                 return ERR_PTR(-ENOMEM);
1761
1762         adreno_gpu = &a5xx_gpu->base;
1763         gpu = &adreno_gpu->base;
1764
1765         adreno_gpu->registers = a5xx_registers;
1766
1767         a5xx_gpu->lm_leakage = 0x4E001A;
1768
1769         check_speed_bin(&pdev->dev);
1770
1771         nr_rings = 4;
1772
1773         if (adreno_cmp_rev(ADRENO_REV(5, 1, 0, ANY_ID), config->rev))
1774                 nr_rings = 1;
1775
1776         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1777         if (ret) {
1778                 a5xx_destroy(&(a5xx_gpu->base.base));
1779                 return ERR_PTR(ret);
1780         }
1781
1782         if (gpu->aspace)
1783                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1784
1785         /* Set up the preemption specific bits and pieces for each ringbuffer */
1786         a5xx_preempt_init(gpu);
1787
1788         return gpu;
1789 }