1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
28 spin_lock_irqsave(&ring->lock, flags);
30 /* Copy the shadow to the actual register */
31 ring->cur = ring->next;
33 /* Make sure to wrap wptr if we need to */
34 wptr = get_wptr(ring);
36 spin_unlock_irqrestore(&ring->lock, flags);
38 /* Make sure everything is posted before making a decision */
41 /* Update HW if this is the current ring and we are not in preempt */
42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 struct msm_file_private *ctx)
49 struct msm_drm_private *priv = gpu->dev->dev_private;
50 struct msm_ringbuffer *ring = submit->ring;
51 struct msm_gem_object *obj;
52 uint32_t *ptr, dwords;
55 for (i = 0; i < submit->nr_cmds; i++) {
56 switch (submit->cmd[i].type) {
57 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 if (priv->lastctx == ctx)
63 case MSM_SUBMIT_CMD_BUF:
64 /* copy commands into RB: */
65 obj = submit->bos[submit->cmd[i].idx].obj;
66 dwords = submit->cmd[i].size;
68 ptr = msm_gem_get_vaddr(&obj->base);
70 /* _get_vaddr() shouldn't fail at this point,
71 * since we've already mapped it once in
77 for (i = 0; i < dwords; i++) {
78 /* normally the OUT_PKTn() would wait
79 * for space for the packet. But since
80 * we just OUT_RING() the whole thing,
81 * need to call adreno_wait_ring()
84 adreno_wait_ring(ring, 1);
85 OUT_RING(ring, ptr[i]);
88 msm_gem_put_vaddr(&obj->base);
94 a5xx_flush(gpu, ring);
95 a5xx_preempt_trigger(gpu);
97 /* we might not necessarily have a cmd from userspace to
98 * trigger an event to know that submit has completed, so
101 a5xx_idle(gpu, ring);
102 ring->memptrs->fence = submit->seqno;
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 struct msm_file_private *ctx)
109 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 struct msm_drm_private *priv = gpu->dev->dev_private;
112 struct msm_ringbuffer *ring = submit->ring;
113 unsigned int i, ibs = 0;
115 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 priv->lastctx = NULL;
117 a5xx_submit_in_rb(gpu, submit, ctx);
121 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 OUT_RING(ring, 0x02);
124 /* Turn off protected mode to write to special registers */
125 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
128 /* Set the save preemption record for the ring/command */
129 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
133 /* Turn back on protected mode */
134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
137 /* Enable local preemption for finegrain preemption */
138 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 OUT_RING(ring, 0x02);
141 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 OUT_RING(ring, 0x02);
145 /* Submit the commands */
146 for (i = 0; i < submit->nr_cmds; i++) {
147 switch (submit->cmd[i].type) {
148 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
150 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 if (priv->lastctx == ctx)
154 case MSM_SUBMIT_CMD_BUF:
155 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 OUT_RING(ring, submit->cmd[i].size);
165 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 * are done rendering - otherwise a lucky preemption would start
167 * replaying from the last checkpoint
169 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
176 /* Turn off IB level preemptions */
177 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 OUT_RING(ring, 0x01);
180 /* Write the fence to the scratch register */
181 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 OUT_RING(ring, submit->seqno);
185 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 * timestamp is written to the memory and then triggers the interrupt
188 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
190 CP_EVENT_WRITE_0_IRQ);
191 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
192 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
193 OUT_RING(ring, submit->seqno);
195 /* Yield the floor on command completion */
196 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
198 * If dword[2:1] are non zero, they specify an address for the CP to
199 * write the value of dword[3] to on preemption complete. Write 0 to
202 OUT_RING(ring, 0x00);
203 OUT_RING(ring, 0x00);
204 /* Data value - not used if the address above is 0 */
205 OUT_RING(ring, 0x01);
206 /* Set bit 0 to trigger an interrupt on preempt complete */
207 OUT_RING(ring, 0x01);
209 a5xx_flush(gpu, ring);
211 /* Check to see if we need to start preemption */
212 a5xx_preempt_trigger(gpu);
215 static const struct {
219 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
220 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
221 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
222 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
224 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
225 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
226 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
227 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
228 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
229 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
230 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
231 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
232 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
233 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
234 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
235 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
236 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
237 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
238 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
240 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
241 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
242 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
244 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
245 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
246 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
247 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
248 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
249 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
250 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
251 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
252 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
253 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
254 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
255 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
256 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
257 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
258 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
259 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
260 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
261 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
262 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
264 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
265 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
266 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
268 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
269 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
270 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
271 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
272 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
273 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
274 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
275 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
276 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
277 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
278 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
279 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
280 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
283 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
284 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
286 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
287 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
288 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
289 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
290 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
292 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
293 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
294 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
295 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
297 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
298 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
299 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
300 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
301 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
302 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
303 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
304 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
305 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
306 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
307 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
308 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
309 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
310 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
313 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
315 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
318 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
319 gpu_write(gpu, a5xx_hwcg[i].offset,
320 state ? a5xx_hwcg[i].value : 0);
322 if (adreno_is_a540(adreno_gpu)) {
323 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
324 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
327 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
331 static int a5xx_me_init(struct msm_gpu *gpu)
333 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334 struct msm_ringbuffer *ring = gpu->rb[0];
336 OUT_PKT7(ring, CP_ME_INIT, 8);
338 OUT_RING(ring, 0x0000002F);
340 /* Enable multiple hardware contexts */
341 OUT_RING(ring, 0x00000003);
343 /* Enable error detection */
344 OUT_RING(ring, 0x20000000);
346 /* Don't enable header dump */
347 OUT_RING(ring, 0x00000000);
348 OUT_RING(ring, 0x00000000);
350 /* Specify workarounds for various microcode issues */
351 if (adreno_is_a530(adreno_gpu)) {
352 /* Workaround for token end syncs
353 * Force a WFI after every direct-render 3D mode draw and every
356 OUT_RING(ring, 0x0000000B);
357 } else if (adreno_is_a510(adreno_gpu)) {
358 /* Workaround for token and syncs */
359 OUT_RING(ring, 0x00000001);
361 /* No workarounds enabled */
362 OUT_RING(ring, 0x00000000);
365 OUT_RING(ring, 0x00000000);
366 OUT_RING(ring, 0x00000000);
368 gpu->funcs->flush(gpu, ring);
369 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
372 static int a5xx_preempt_start(struct msm_gpu *gpu)
374 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
376 struct msm_ringbuffer *ring = gpu->rb[0];
378 if (gpu->nr_rings == 1)
381 /* Turn off protected mode to write to special registers */
382 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
385 /* Set the save preemption record for the ring/command */
386 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
387 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
390 /* Turn back on protected mode */
391 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
394 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
395 OUT_RING(ring, 0x00);
397 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
398 OUT_RING(ring, 0x01);
400 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
401 OUT_RING(ring, 0x01);
403 /* Yield the floor on command completion */
404 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
405 OUT_RING(ring, 0x00);
406 OUT_RING(ring, 0x00);
407 OUT_RING(ring, 0x01);
408 OUT_RING(ring, 0x01);
410 gpu->funcs->flush(gpu, ring);
412 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
415 static int a5xx_ucode_init(struct msm_gpu *gpu)
417 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
418 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
421 if (!a5xx_gpu->pm4_bo) {
422 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
423 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
426 if (IS_ERR(a5xx_gpu->pm4_bo)) {
427 ret = PTR_ERR(a5xx_gpu->pm4_bo);
428 a5xx_gpu->pm4_bo = NULL;
429 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
434 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
437 if (!a5xx_gpu->pfp_bo) {
438 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
439 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
441 if (IS_ERR(a5xx_gpu->pfp_bo)) {
442 ret = PTR_ERR(a5xx_gpu->pfp_bo);
443 a5xx_gpu->pfp_bo = NULL;
444 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
449 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
452 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
453 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
455 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
456 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
461 #define SCM_GPU_ZAP_SHADER_RESUME 0
463 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
467 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
469 DRM_ERROR("%s: zap-shader resume failed: %d\n",
475 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
481 * If the zap shader is already loaded into memory we just need to kick
482 * the remote processor to reinitialize it
485 return a5xx_zap_shader_resume(gpu);
487 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
493 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
494 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
495 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
496 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
497 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
498 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
499 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
500 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
501 A5XX_RBBM_INT_0_MASK_CP_SW | \
502 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
503 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
504 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
506 static int a5xx_hw_init(struct msm_gpu *gpu)
508 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
511 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
513 if (adreno_is_a540(adreno_gpu))
514 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
516 /* Make all blocks contribute to the GPU BUSY perf counter */
517 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
519 /* Enable RBBM error reporting bits */
520 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
522 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
524 * Mask out the activity signals from RB1-3 to avoid false
528 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
530 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
532 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
534 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
536 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
538 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
540 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
542 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
546 /* Enable fault detection */
547 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
550 /* Turn on performance counters */
551 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
553 /* Select CP0 to always count cycles */
554 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
556 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
557 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
559 /* Increase VFD cache access so LRZ and other data gets evicted less */
560 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
562 /* Disable L2 bypass in the UCHE */
563 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
564 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
565 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
566 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
568 /* Set the GMEM VA range (0 to gpu->gmem) */
569 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
570 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
571 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
572 0x00100000 + adreno_gpu->gmem - 1);
573 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
575 if (adreno_is_a510(adreno_gpu)) {
576 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
577 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
578 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
579 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
580 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
581 (0x200 << 11 | 0x200 << 22));
583 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
584 if (adreno_is_a530(adreno_gpu))
585 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
586 if (adreno_is_a540(adreno_gpu))
587 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
588 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
589 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
590 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
591 (0x400 << 11 | 0x300 << 22));
594 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
595 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
597 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
599 /* Enable USE_RETENTION_FLOPS */
600 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
602 /* Enable ME/PFP split notification */
603 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
606 * In A5x, CCU can send context_done event of a particular context to
607 * UCHE which ultimately reaches CP even when there is valid
608 * transaction of that context inside CCU. This can let CP to program
609 * config registers, which will make the "valid transaction" inside
610 * CCU to be interpreted differently. This can cause gpu fault. This
611 * bug is fixed in latest A510 revision. To enable this bug fix -
612 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
613 * (disable). For older A510 version this bit is unused.
615 if (adreno_is_a510(adreno_gpu))
616 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
619 a5xx_set_hwcg(gpu, true);
621 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
623 /* Set the highest bank bit */
624 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
625 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
626 if (adreno_is_a540(adreno_gpu))
627 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
629 /* Protect registers from the CP */
630 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
633 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
634 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
635 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
636 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
637 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
638 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
640 /* Content protect */
641 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
642 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
644 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
645 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
648 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
649 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
650 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
651 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
654 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
655 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
658 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
659 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
662 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
664 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
665 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
666 ADRENO_PROTECT_RW(0x10000, 0x8000));
668 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
670 * Disable the trusted memory range - we don't actually supported secure
671 * memory rendering at this point in time and we don't want to block off
672 * part of the virtual memory space.
674 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
676 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
678 /* Put the GPU into 64 bit by default */
679 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
680 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
681 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
682 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
683 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
684 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
685 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
686 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
687 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
688 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
689 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
690 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
693 * VPC corner case with local memory load kill leads to corrupt
694 * internal state. Normal Disable does not work for all a5x chips.
695 * So do the following setting to disable it.
697 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
698 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
699 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
702 ret = adreno_hw_init(gpu);
706 a5xx_preempt_hw_init(gpu);
708 if (!adreno_is_a510(adreno_gpu))
709 a5xx_gpmu_ucode_init(gpu);
711 ret = a5xx_ucode_init(gpu);
715 /* Disable the interrupts through the initial bringup stage */
716 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
718 /* Clear ME_HALT to start the micro engine */
719 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
720 ret = a5xx_me_init(gpu);
724 ret = a5xx_power_init(gpu);
729 * Send a pipeline event stat to get misbehaving counters to start
732 if (adreno_is_a530(adreno_gpu)) {
733 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
734 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
736 gpu->funcs->flush(gpu, gpu->rb[0]);
737 if (!a5xx_idle(gpu, gpu->rb[0]))
742 * If the chip that we are using does support loading one, then
743 * try to load a zap shader into the secure world. If successful
744 * we can use the CP to switch out of secure mode. If not then we
745 * have no resource but to try to switch ourselves out manually. If we
746 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
747 * be blocked and a permissions violation will soon follow.
749 ret = a5xx_zap_shader_init(gpu);
751 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
752 OUT_RING(gpu->rb[0], 0x00000000);
754 gpu->funcs->flush(gpu, gpu->rb[0]);
755 if (!a5xx_idle(gpu, gpu->rb[0]))
757 } else if (ret == -ENODEV) {
759 * This device does not use zap shader (but print a warning
760 * just in case someone got their dt wrong.. hopefully they
761 * have a debug UART to realize the error of their ways...
762 * if you mess this up you are about to crash horribly)
764 dev_warn_once(gpu->dev->dev,
765 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
766 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
771 /* Last step - yield the ringbuffer */
772 a5xx_preempt_start(gpu);
777 static void a5xx_recover(struct msm_gpu *gpu)
781 adreno_dump_info(gpu);
783 for (i = 0; i < 8; i++) {
784 printk("CP_SCRATCH_REG%d: %u\n", i,
785 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
791 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
792 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
793 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
797 static void a5xx_destroy(struct msm_gpu *gpu)
799 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
800 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
802 DBG("%s", gpu->name);
804 a5xx_preempt_fini(gpu);
806 if (a5xx_gpu->pm4_bo) {
807 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
808 drm_gem_object_put(a5xx_gpu->pm4_bo);
811 if (a5xx_gpu->pfp_bo) {
812 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
813 drm_gem_object_put(a5xx_gpu->pfp_bo);
816 if (a5xx_gpu->gpmu_bo) {
817 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
818 drm_gem_object_put(a5xx_gpu->gpmu_bo);
821 adreno_gpu_cleanup(adreno_gpu);
825 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
827 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
831 * Nearly every abnormality ends up pausing the GPU and triggering a
832 * fault so we can safely just watch for this one interrupt to fire
834 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
835 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
838 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
840 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
841 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
843 if (ring != a5xx_gpu->cur_ring) {
844 WARN(1, "Tried to idle a non-current ringbuffer\n");
848 /* wait for CP to drain ringbuffer: */
849 if (!adreno_idle(gpu, ring))
852 if (spin_until(_a5xx_check_idle(gpu))) {
853 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
854 gpu->name, __builtin_return_address(0),
855 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
856 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
857 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
858 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
865 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
867 struct msm_gpu *gpu = arg;
868 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
870 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
871 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
872 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
873 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
878 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
880 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
882 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
885 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
888 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
892 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
893 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
895 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
899 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
900 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
901 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
903 if (status & A5XX_CP_INT_CP_DMA_ERROR)
904 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
906 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
907 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
909 dev_err_ratelimited(gpu->dev->dev,
910 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
911 val & (1 << 24) ? "WRITE" : "READ",
912 (val & 0xFFFFF) >> 2, val);
915 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
916 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
917 const char *access[16] = { "reserved", "reserved",
918 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
919 "", "", "me read", "me write", "", "", "crashdump read",
922 dev_err_ratelimited(gpu->dev->dev,
923 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
924 status & 0xFFFFF, access[(status >> 24) & 0xF],
925 (status & (1 << 31)), status);
929 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
931 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
932 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
934 dev_err_ratelimited(gpu->dev->dev,
935 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
936 val & (1 << 28) ? "WRITE" : "READ",
937 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
940 /* Clear the error */
941 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
943 /* Clear the interrupt */
944 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
945 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
948 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
949 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
951 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
952 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
953 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
955 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
956 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
957 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
959 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
960 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
961 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
963 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
964 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
966 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
967 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
970 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
972 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
974 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
976 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
980 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
982 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
985 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
987 struct drm_device *dev = gpu->dev;
988 struct msm_drm_private *priv = dev->dev_private;
989 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
991 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
992 ring ? ring->id : -1, ring ? ring->seqno : 0,
993 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
994 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
995 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
996 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
997 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
998 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
999 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1001 /* Turn off the hangcheck timer to keep it from bothering us */
1002 del_timer(&gpu->hangcheck_timer);
1004 queue_work(priv->wq, &gpu->recover_work);
1007 #define RBBM_ERROR_MASK \
1008 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1009 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1010 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1011 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1012 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1013 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1015 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1017 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1020 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1021 * before the source is cleared the interrupt will storm.
1023 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1024 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1026 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1027 if (status & RBBM_ERROR_MASK)
1028 a5xx_rbbm_err_irq(gpu, status);
1030 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1031 a5xx_cp_err_irq(gpu);
1033 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1034 a5xx_fault_detect_irq(gpu);
1036 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1037 a5xx_uche_err_irq(gpu);
1039 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1040 a5xx_gpmu_err_irq(gpu);
1042 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1043 a5xx_preempt_trigger(gpu);
1044 msm_gpu_retire(gpu);
1047 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1048 a5xx_preempt_irq(gpu);
1053 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1054 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1055 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1056 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1057 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1058 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1059 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1060 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1061 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1064 static const u32 a5xx_registers[] = {
1065 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1066 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1067 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1068 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1069 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1070 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1071 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1072 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1073 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1074 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1075 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1076 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1077 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1078 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1079 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1080 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1081 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1082 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1083 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1084 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1085 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1086 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1087 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1088 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1089 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1090 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1091 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1095 static void a5xx_dump(struct msm_gpu *gpu)
1097 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1098 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1102 static int a5xx_pm_resume(struct msm_gpu *gpu)
1104 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1107 /* Turn on the core power */
1108 ret = msm_gpu_pm_resume(gpu);
1112 if (adreno_is_a510(adreno_gpu)) {
1113 /* Halt the sp_input_clk at HM level */
1114 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1115 a5xx_set_hwcg(gpu, true);
1116 /* Turn on sp_input_clk at HM level */
1117 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1121 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1122 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1124 /* Wait 3 usecs before polling */
1127 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1128 (1 << 20), (1 << 20));
1130 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1132 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1136 /* Turn on the SP domain */
1137 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1138 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1139 (1 << 20), (1 << 20));
1141 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1147 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1149 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1152 /* A510 has 3 XIN ports in VBIF */
1153 if (adreno_is_a510(adreno_gpu))
1156 /* Clear the VBIF pipe before shutting down */
1157 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1158 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1161 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1164 * Reset the VBIF before power collapse to avoid issue with FIFO
1167 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1168 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1170 return msm_gpu_pm_suspend(gpu);
1173 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1175 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1176 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1181 struct a5xx_crashdumper {
1183 struct drm_gem_object *bo;
1187 struct a5xx_gpu_state {
1188 struct msm_gpu_state base;
1192 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1193 struct a5xx_crashdumper *dumper)
1195 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1196 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1197 &dumper->bo, &dumper->iova);
1199 if (!IS_ERR(dumper->ptr))
1200 msm_gem_object_set_name(dumper->bo, "crashdump");
1202 return PTR_ERR_OR_ZERO(dumper->ptr);
1205 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1206 struct a5xx_crashdumper *dumper)
1210 if (IS_ERR_OR_NULL(dumper->ptr))
1213 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1214 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1216 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1218 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1219 val & 0x04, 100, 10000);
1223 * These are a list of the registers that need to be read through the HLSQ
1224 * aperture through the crashdumper. These are not nominally accessible from
1225 * the CPU on a secure platform.
1227 static const struct {
1231 } a5xx_hlsq_aperture_regs[] = {
1232 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1233 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1234 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1235 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1236 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1237 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1238 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1239 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1240 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1241 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1242 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1243 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1244 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1245 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1246 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1249 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1250 struct a5xx_gpu_state *a5xx_state)
1252 struct a5xx_crashdumper dumper = { 0 };
1253 u32 offset, count = 0;
1257 if (a5xx_crashdumper_init(gpu, &dumper))
1260 /* The script will be written at offset 0 */
1263 /* Start writing the data at offset 256k */
1264 offset = dumper.iova + (256 * SZ_1K);
1266 /* Count how many additional registers to get from the HLSQ aperture */
1267 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1268 count += a5xx_hlsq_aperture_regs[i].count;
1270 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1271 if (!a5xx_state->hlsqregs)
1274 /* Build the crashdump script */
1275 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1276 u32 type = a5xx_hlsq_aperture_regs[i].type;
1277 u32 c = a5xx_hlsq_aperture_regs[i].count;
1279 /* Write the register to select the desired bank */
1280 *ptr++ = ((u64) type << 8);
1281 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1285 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1288 offset += c * sizeof(u32);
1291 /* Write two zeros to close off the script */
1295 if (a5xx_crashdumper_run(gpu, &dumper)) {
1296 kfree(a5xx_state->hlsqregs);
1297 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1301 /* Copy the data from the crashdumper to the state */
1302 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1303 count * sizeof(u32));
1305 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1308 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1310 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1314 return ERR_PTR(-ENOMEM);
1316 /* Temporarily disable hardware clock gating before reading the hw */
1317 a5xx_set_hwcg(gpu, false);
1319 /* First get the generic state from the adreno core */
1320 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1322 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1324 /* Get the HLSQ regs with the help of the crashdumper */
1325 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1327 a5xx_set_hwcg(gpu, true);
1329 return &a5xx_state->base;
1332 static void a5xx_gpu_state_destroy(struct kref *kref)
1334 struct msm_gpu_state *state = container_of(kref,
1335 struct msm_gpu_state, ref);
1336 struct a5xx_gpu_state *a5xx_state = container_of(state,
1337 struct a5xx_gpu_state, base);
1339 kfree(a5xx_state->hlsqregs);
1341 adreno_gpu_state_destroy(state);
1345 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1347 if (IS_ERR_OR_NULL(state))
1350 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1354 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1355 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1356 struct drm_printer *p)
1360 struct a5xx_gpu_state *a5xx_state = container_of(state,
1361 struct a5xx_gpu_state, base);
1363 if (IS_ERR_OR_NULL(state))
1366 adreno_show(gpu, state, p);
1368 /* Dump the additional a5xx HLSQ registers */
1369 if (!a5xx_state->hlsqregs)
1372 drm_printf(p, "registers-hlsq:\n");
1374 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1375 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1376 u32 c = a5xx_hlsq_aperture_regs[i].count;
1378 for (j = 0; j < c; j++, pos++, o++) {
1380 * To keep the crashdump simple we pull the entire range
1381 * for each register type but not all of the registers
1382 * in the range are valid. Fortunately invalid registers
1383 * stick out like a sore thumb with a value of
1386 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1389 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1390 o << 2, a5xx_state->hlsqregs[pos]);
1396 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1398 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1399 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1401 return a5xx_gpu->cur_ring;
1404 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1406 u64 busy_cycles, busy_time;
1408 /* Only read the gpu busy if the hardware is already active */
1409 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1412 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1413 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1415 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1416 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1418 gpu->devfreq.busy_cycles = busy_cycles;
1420 pm_runtime_put(&gpu->pdev->dev);
1422 if (WARN_ON(busy_time > ~0LU))
1425 return (unsigned long)busy_time;
1428 static const struct adreno_gpu_funcs funcs = {
1430 .get_param = adreno_get_param,
1431 .hw_init = a5xx_hw_init,
1432 .pm_suspend = a5xx_pm_suspend,
1433 .pm_resume = a5xx_pm_resume,
1434 .recover = a5xx_recover,
1435 .submit = a5xx_submit,
1436 .flush = a5xx_flush,
1437 .active_ring = a5xx_active_ring,
1439 .destroy = a5xx_destroy,
1440 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1443 #if defined(CONFIG_DEBUG_FS)
1444 .debugfs_init = a5xx_debugfs_init,
1446 .gpu_busy = a5xx_gpu_busy,
1447 .gpu_state_get = a5xx_gpu_state_get,
1448 .gpu_state_put = a5xx_gpu_state_put,
1449 .create_address_space = adreno_iommu_create_address_space,
1451 .get_timestamp = a5xx_get_timestamp,
1454 static void check_speed_bin(struct device *dev)
1456 struct nvmem_cell *cell;
1460 * If the OPP table specifies a opp-supported-hw property then we have
1461 * to set something with dev_pm_opp_set_supported_hw() or the table
1462 * doesn't get populated so pick an arbitrary value that should
1463 * ensure the default frequencies are selected but not conflict with any
1468 cell = nvmem_cell_get(dev, "speed_bin");
1470 if (!IS_ERR(cell)) {
1471 void *buf = nvmem_cell_read(cell, NULL);
1474 u8 bin = *((u8 *) buf);
1480 nvmem_cell_put(cell);
1483 dev_pm_opp_set_supported_hw(dev, &val, 1);
1486 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1488 struct msm_drm_private *priv = dev->dev_private;
1489 struct platform_device *pdev = priv->gpu_pdev;
1490 struct a5xx_gpu *a5xx_gpu = NULL;
1491 struct adreno_gpu *adreno_gpu;
1492 struct msm_gpu *gpu;
1496 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1497 return ERR_PTR(-ENXIO);
1500 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1502 return ERR_PTR(-ENOMEM);
1504 adreno_gpu = &a5xx_gpu->base;
1505 gpu = &adreno_gpu->base;
1507 adreno_gpu->registers = a5xx_registers;
1508 adreno_gpu->reg_offsets = a5xx_register_offsets;
1510 a5xx_gpu->lm_leakage = 0x4E001A;
1512 check_speed_bin(&pdev->dev);
1514 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1516 a5xx_destroy(&(a5xx_gpu->base.base));
1517 return ERR_PTR(ret);
1521 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1523 /* Set up the preemption specific bits and pieces for each ringbuffer */
1524 a5xx_preempt_init(gpu);