2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
97 actual_temp = temp & 0x1ff;
99 actual_temp = actual_temp * 1000;
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
110 temp = RREG32_SMC(0xC0300E0C);
113 actual_temp = (temp / 8) - 49;
117 actual_temp = actual_temp * 1000;
123 * Indirect registers accessor
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 WREG32(PCIE_INDEX, reg);
132 (void)RREG32(PCIE_INDEX);
133 r = RREG32(PCIE_DATA);
134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 WREG32(PCIE_DATA, v);
146 (void)RREG32(PCIE_DATA);
147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
150 static const u32 spectre_rlc_save_restore_register_list[] =
152 (0x0e00 << 16) | (0xc12c >> 2),
154 (0x0e00 << 16) | (0xc140 >> 2),
156 (0x0e00 << 16) | (0xc150 >> 2),
158 (0x0e00 << 16) | (0xc15c >> 2),
160 (0x0e00 << 16) | (0xc168 >> 2),
162 (0x0e00 << 16) | (0xc170 >> 2),
164 (0x0e00 << 16) | (0xc178 >> 2),
166 (0x0e00 << 16) | (0xc204 >> 2),
168 (0x0e00 << 16) | (0xc2b4 >> 2),
170 (0x0e00 << 16) | (0xc2b8 >> 2),
172 (0x0e00 << 16) | (0xc2bc >> 2),
174 (0x0e00 << 16) | (0xc2c0 >> 2),
176 (0x0e00 << 16) | (0x8228 >> 2),
178 (0x0e00 << 16) | (0x829c >> 2),
180 (0x0e00 << 16) | (0x869c >> 2),
182 (0x0600 << 16) | (0x98f4 >> 2),
184 (0x0e00 << 16) | (0x98f8 >> 2),
186 (0x0e00 << 16) | (0x9900 >> 2),
188 (0x0e00 << 16) | (0xc260 >> 2),
190 (0x0e00 << 16) | (0x90e8 >> 2),
192 (0x0e00 << 16) | (0x3c000 >> 2),
194 (0x0e00 << 16) | (0x3c00c >> 2),
196 (0x0e00 << 16) | (0x8c1c >> 2),
198 (0x0e00 << 16) | (0x9700 >> 2),
200 (0x0e00 << 16) | (0xcd20 >> 2),
202 (0x4e00 << 16) | (0xcd20 >> 2),
204 (0x5e00 << 16) | (0xcd20 >> 2),
206 (0x6e00 << 16) | (0xcd20 >> 2),
208 (0x7e00 << 16) | (0xcd20 >> 2),
210 (0x8e00 << 16) | (0xcd20 >> 2),
212 (0x9e00 << 16) | (0xcd20 >> 2),
214 (0xae00 << 16) | (0xcd20 >> 2),
216 (0xbe00 << 16) | (0xcd20 >> 2),
218 (0x0e00 << 16) | (0x89bc >> 2),
220 (0x0e00 << 16) | (0x8900 >> 2),
223 (0x0e00 << 16) | (0xc130 >> 2),
225 (0x0e00 << 16) | (0xc134 >> 2),
227 (0x0e00 << 16) | (0xc1fc >> 2),
229 (0x0e00 << 16) | (0xc208 >> 2),
231 (0x0e00 << 16) | (0xc264 >> 2),
233 (0x0e00 << 16) | (0xc268 >> 2),
235 (0x0e00 << 16) | (0xc26c >> 2),
237 (0x0e00 << 16) | (0xc270 >> 2),
239 (0x0e00 << 16) | (0xc274 >> 2),
241 (0x0e00 << 16) | (0xc278 >> 2),
243 (0x0e00 << 16) | (0xc27c >> 2),
245 (0x0e00 << 16) | (0xc280 >> 2),
247 (0x0e00 << 16) | (0xc284 >> 2),
249 (0x0e00 << 16) | (0xc288 >> 2),
251 (0x0e00 << 16) | (0xc28c >> 2),
253 (0x0e00 << 16) | (0xc290 >> 2),
255 (0x0e00 << 16) | (0xc294 >> 2),
257 (0x0e00 << 16) | (0xc298 >> 2),
259 (0x0e00 << 16) | (0xc29c >> 2),
261 (0x0e00 << 16) | (0xc2a0 >> 2),
263 (0x0e00 << 16) | (0xc2a4 >> 2),
265 (0x0e00 << 16) | (0xc2a8 >> 2),
267 (0x0e00 << 16) | (0xc2ac >> 2),
269 (0x0e00 << 16) | (0xc2b0 >> 2),
271 (0x0e00 << 16) | (0x301d0 >> 2),
273 (0x0e00 << 16) | (0x30238 >> 2),
275 (0x0e00 << 16) | (0x30250 >> 2),
277 (0x0e00 << 16) | (0x30254 >> 2),
279 (0x0e00 << 16) | (0x30258 >> 2),
281 (0x0e00 << 16) | (0x3025c >> 2),
283 (0x4e00 << 16) | (0xc900 >> 2),
285 (0x5e00 << 16) | (0xc900 >> 2),
287 (0x6e00 << 16) | (0xc900 >> 2),
289 (0x7e00 << 16) | (0xc900 >> 2),
291 (0x8e00 << 16) | (0xc900 >> 2),
293 (0x9e00 << 16) | (0xc900 >> 2),
295 (0xae00 << 16) | (0xc900 >> 2),
297 (0xbe00 << 16) | (0xc900 >> 2),
299 (0x4e00 << 16) | (0xc904 >> 2),
301 (0x5e00 << 16) | (0xc904 >> 2),
303 (0x6e00 << 16) | (0xc904 >> 2),
305 (0x7e00 << 16) | (0xc904 >> 2),
307 (0x8e00 << 16) | (0xc904 >> 2),
309 (0x9e00 << 16) | (0xc904 >> 2),
311 (0xae00 << 16) | (0xc904 >> 2),
313 (0xbe00 << 16) | (0xc904 >> 2),
315 (0x4e00 << 16) | (0xc908 >> 2),
317 (0x5e00 << 16) | (0xc908 >> 2),
319 (0x6e00 << 16) | (0xc908 >> 2),
321 (0x7e00 << 16) | (0xc908 >> 2),
323 (0x8e00 << 16) | (0xc908 >> 2),
325 (0x9e00 << 16) | (0xc908 >> 2),
327 (0xae00 << 16) | (0xc908 >> 2),
329 (0xbe00 << 16) | (0xc908 >> 2),
331 (0x4e00 << 16) | (0xc90c >> 2),
333 (0x5e00 << 16) | (0xc90c >> 2),
335 (0x6e00 << 16) | (0xc90c >> 2),
337 (0x7e00 << 16) | (0xc90c >> 2),
339 (0x8e00 << 16) | (0xc90c >> 2),
341 (0x9e00 << 16) | (0xc90c >> 2),
343 (0xae00 << 16) | (0xc90c >> 2),
345 (0xbe00 << 16) | (0xc90c >> 2),
347 (0x4e00 << 16) | (0xc910 >> 2),
349 (0x5e00 << 16) | (0xc910 >> 2),
351 (0x6e00 << 16) | (0xc910 >> 2),
353 (0x7e00 << 16) | (0xc910 >> 2),
355 (0x8e00 << 16) | (0xc910 >> 2),
357 (0x9e00 << 16) | (0xc910 >> 2),
359 (0xae00 << 16) | (0xc910 >> 2),
361 (0xbe00 << 16) | (0xc910 >> 2),
363 (0x0e00 << 16) | (0xc99c >> 2),
365 (0x0e00 << 16) | (0x9834 >> 2),
367 (0x0000 << 16) | (0x30f00 >> 2),
369 (0x0001 << 16) | (0x30f00 >> 2),
371 (0x0000 << 16) | (0x30f04 >> 2),
373 (0x0001 << 16) | (0x30f04 >> 2),
375 (0x0000 << 16) | (0x30f08 >> 2),
377 (0x0001 << 16) | (0x30f08 >> 2),
379 (0x0000 << 16) | (0x30f0c >> 2),
381 (0x0001 << 16) | (0x30f0c >> 2),
383 (0x0600 << 16) | (0x9b7c >> 2),
385 (0x0e00 << 16) | (0x8a14 >> 2),
387 (0x0e00 << 16) | (0x8a18 >> 2),
389 (0x0600 << 16) | (0x30a00 >> 2),
391 (0x0e00 << 16) | (0x8bf0 >> 2),
393 (0x0e00 << 16) | (0x8bcc >> 2),
395 (0x0e00 << 16) | (0x8b24 >> 2),
397 (0x0e00 << 16) | (0x30a04 >> 2),
399 (0x0600 << 16) | (0x30a10 >> 2),
401 (0x0600 << 16) | (0x30a14 >> 2),
403 (0x0600 << 16) | (0x30a18 >> 2),
405 (0x0600 << 16) | (0x30a2c >> 2),
407 (0x0e00 << 16) | (0xc700 >> 2),
409 (0x0e00 << 16) | (0xc704 >> 2),
411 (0x0e00 << 16) | (0xc708 >> 2),
413 (0x0e00 << 16) | (0xc768 >> 2),
415 (0x0400 << 16) | (0xc770 >> 2),
417 (0x0400 << 16) | (0xc774 >> 2),
419 (0x0400 << 16) | (0xc778 >> 2),
421 (0x0400 << 16) | (0xc77c >> 2),
423 (0x0400 << 16) | (0xc780 >> 2),
425 (0x0400 << 16) | (0xc784 >> 2),
427 (0x0400 << 16) | (0xc788 >> 2),
429 (0x0400 << 16) | (0xc78c >> 2),
431 (0x0400 << 16) | (0xc798 >> 2),
433 (0x0400 << 16) | (0xc79c >> 2),
435 (0x0400 << 16) | (0xc7a0 >> 2),
437 (0x0400 << 16) | (0xc7a4 >> 2),
439 (0x0400 << 16) | (0xc7a8 >> 2),
441 (0x0400 << 16) | (0xc7ac >> 2),
443 (0x0400 << 16) | (0xc7b0 >> 2),
445 (0x0400 << 16) | (0xc7b4 >> 2),
447 (0x0e00 << 16) | (0x9100 >> 2),
449 (0x0e00 << 16) | (0x3c010 >> 2),
451 (0x0e00 << 16) | (0x92a8 >> 2),
453 (0x0e00 << 16) | (0x92ac >> 2),
455 (0x0e00 << 16) | (0x92b4 >> 2),
457 (0x0e00 << 16) | (0x92b8 >> 2),
459 (0x0e00 << 16) | (0x92bc >> 2),
461 (0x0e00 << 16) | (0x92c0 >> 2),
463 (0x0e00 << 16) | (0x92c4 >> 2),
465 (0x0e00 << 16) | (0x92c8 >> 2),
467 (0x0e00 << 16) | (0x92cc >> 2),
469 (0x0e00 << 16) | (0x92d0 >> 2),
471 (0x0e00 << 16) | (0x8c00 >> 2),
473 (0x0e00 << 16) | (0x8c04 >> 2),
475 (0x0e00 << 16) | (0x8c20 >> 2),
477 (0x0e00 << 16) | (0x8c38 >> 2),
479 (0x0e00 << 16) | (0x8c3c >> 2),
481 (0x0e00 << 16) | (0xae00 >> 2),
483 (0x0e00 << 16) | (0x9604 >> 2),
485 (0x0e00 << 16) | (0xac08 >> 2),
487 (0x0e00 << 16) | (0xac0c >> 2),
489 (0x0e00 << 16) | (0xac10 >> 2),
491 (0x0e00 << 16) | (0xac14 >> 2),
493 (0x0e00 << 16) | (0xac58 >> 2),
495 (0x0e00 << 16) | (0xac68 >> 2),
497 (0x0e00 << 16) | (0xac6c >> 2),
499 (0x0e00 << 16) | (0xac70 >> 2),
501 (0x0e00 << 16) | (0xac74 >> 2),
503 (0x0e00 << 16) | (0xac78 >> 2),
505 (0x0e00 << 16) | (0xac7c >> 2),
507 (0x0e00 << 16) | (0xac80 >> 2),
509 (0x0e00 << 16) | (0xac84 >> 2),
511 (0x0e00 << 16) | (0xac88 >> 2),
513 (0x0e00 << 16) | (0xac8c >> 2),
515 (0x0e00 << 16) | (0x970c >> 2),
517 (0x0e00 << 16) | (0x9714 >> 2),
519 (0x0e00 << 16) | (0x9718 >> 2),
521 (0x0e00 << 16) | (0x971c >> 2),
523 (0x0e00 << 16) | (0x31068 >> 2),
525 (0x4e00 << 16) | (0x31068 >> 2),
527 (0x5e00 << 16) | (0x31068 >> 2),
529 (0x6e00 << 16) | (0x31068 >> 2),
531 (0x7e00 << 16) | (0x31068 >> 2),
533 (0x8e00 << 16) | (0x31068 >> 2),
535 (0x9e00 << 16) | (0x31068 >> 2),
537 (0xae00 << 16) | (0x31068 >> 2),
539 (0xbe00 << 16) | (0x31068 >> 2),
541 (0x0e00 << 16) | (0xcd10 >> 2),
543 (0x0e00 << 16) | (0xcd14 >> 2),
545 (0x0e00 << 16) | (0x88b0 >> 2),
547 (0x0e00 << 16) | (0x88b4 >> 2),
549 (0x0e00 << 16) | (0x88b8 >> 2),
551 (0x0e00 << 16) | (0x88bc >> 2),
553 (0x0400 << 16) | (0x89c0 >> 2),
555 (0x0e00 << 16) | (0x88c4 >> 2),
557 (0x0e00 << 16) | (0x88c8 >> 2),
559 (0x0e00 << 16) | (0x88d0 >> 2),
561 (0x0e00 << 16) | (0x88d4 >> 2),
563 (0x0e00 << 16) | (0x88d8 >> 2),
565 (0x0e00 << 16) | (0x8980 >> 2),
567 (0x0e00 << 16) | (0x30938 >> 2),
569 (0x0e00 << 16) | (0x3093c >> 2),
571 (0x0e00 << 16) | (0x30940 >> 2),
573 (0x0e00 << 16) | (0x89a0 >> 2),
575 (0x0e00 << 16) | (0x30900 >> 2),
577 (0x0e00 << 16) | (0x30904 >> 2),
579 (0x0e00 << 16) | (0x89b4 >> 2),
581 (0x0e00 << 16) | (0x3c210 >> 2),
583 (0x0e00 << 16) | (0x3c214 >> 2),
585 (0x0e00 << 16) | (0x3c218 >> 2),
587 (0x0e00 << 16) | (0x8904 >> 2),
590 (0x0e00 << 16) | (0x8c28 >> 2),
591 (0x0e00 << 16) | (0x8c2c >> 2),
592 (0x0e00 << 16) | (0x8c30 >> 2),
593 (0x0e00 << 16) | (0x8c34 >> 2),
594 (0x0e00 << 16) | (0x9600 >> 2),
597 static const u32 kalindi_rlc_save_restore_register_list[] =
599 (0x0e00 << 16) | (0xc12c >> 2),
601 (0x0e00 << 16) | (0xc140 >> 2),
603 (0x0e00 << 16) | (0xc150 >> 2),
605 (0x0e00 << 16) | (0xc15c >> 2),
607 (0x0e00 << 16) | (0xc168 >> 2),
609 (0x0e00 << 16) | (0xc170 >> 2),
611 (0x0e00 << 16) | (0xc204 >> 2),
613 (0x0e00 << 16) | (0xc2b4 >> 2),
615 (0x0e00 << 16) | (0xc2b8 >> 2),
617 (0x0e00 << 16) | (0xc2bc >> 2),
619 (0x0e00 << 16) | (0xc2c0 >> 2),
621 (0x0e00 << 16) | (0x8228 >> 2),
623 (0x0e00 << 16) | (0x829c >> 2),
625 (0x0e00 << 16) | (0x869c >> 2),
627 (0x0600 << 16) | (0x98f4 >> 2),
629 (0x0e00 << 16) | (0x98f8 >> 2),
631 (0x0e00 << 16) | (0x9900 >> 2),
633 (0x0e00 << 16) | (0xc260 >> 2),
635 (0x0e00 << 16) | (0x90e8 >> 2),
637 (0x0e00 << 16) | (0x3c000 >> 2),
639 (0x0e00 << 16) | (0x3c00c >> 2),
641 (0x0e00 << 16) | (0x8c1c >> 2),
643 (0x0e00 << 16) | (0x9700 >> 2),
645 (0x0e00 << 16) | (0xcd20 >> 2),
647 (0x4e00 << 16) | (0xcd20 >> 2),
649 (0x5e00 << 16) | (0xcd20 >> 2),
651 (0x6e00 << 16) | (0xcd20 >> 2),
653 (0x7e00 << 16) | (0xcd20 >> 2),
655 (0x0e00 << 16) | (0x89bc >> 2),
657 (0x0e00 << 16) | (0x8900 >> 2),
660 (0x0e00 << 16) | (0xc130 >> 2),
662 (0x0e00 << 16) | (0xc134 >> 2),
664 (0x0e00 << 16) | (0xc1fc >> 2),
666 (0x0e00 << 16) | (0xc208 >> 2),
668 (0x0e00 << 16) | (0xc264 >> 2),
670 (0x0e00 << 16) | (0xc268 >> 2),
672 (0x0e00 << 16) | (0xc26c >> 2),
674 (0x0e00 << 16) | (0xc270 >> 2),
676 (0x0e00 << 16) | (0xc274 >> 2),
678 (0x0e00 << 16) | (0xc28c >> 2),
680 (0x0e00 << 16) | (0xc290 >> 2),
682 (0x0e00 << 16) | (0xc294 >> 2),
684 (0x0e00 << 16) | (0xc298 >> 2),
686 (0x0e00 << 16) | (0xc2a0 >> 2),
688 (0x0e00 << 16) | (0xc2a4 >> 2),
690 (0x0e00 << 16) | (0xc2a8 >> 2),
692 (0x0e00 << 16) | (0xc2ac >> 2),
694 (0x0e00 << 16) | (0x301d0 >> 2),
696 (0x0e00 << 16) | (0x30238 >> 2),
698 (0x0e00 << 16) | (0x30250 >> 2),
700 (0x0e00 << 16) | (0x30254 >> 2),
702 (0x0e00 << 16) | (0x30258 >> 2),
704 (0x0e00 << 16) | (0x3025c >> 2),
706 (0x4e00 << 16) | (0xc900 >> 2),
708 (0x5e00 << 16) | (0xc900 >> 2),
710 (0x6e00 << 16) | (0xc900 >> 2),
712 (0x7e00 << 16) | (0xc900 >> 2),
714 (0x4e00 << 16) | (0xc904 >> 2),
716 (0x5e00 << 16) | (0xc904 >> 2),
718 (0x6e00 << 16) | (0xc904 >> 2),
720 (0x7e00 << 16) | (0xc904 >> 2),
722 (0x4e00 << 16) | (0xc908 >> 2),
724 (0x5e00 << 16) | (0xc908 >> 2),
726 (0x6e00 << 16) | (0xc908 >> 2),
728 (0x7e00 << 16) | (0xc908 >> 2),
730 (0x4e00 << 16) | (0xc90c >> 2),
732 (0x5e00 << 16) | (0xc90c >> 2),
734 (0x6e00 << 16) | (0xc90c >> 2),
736 (0x7e00 << 16) | (0xc90c >> 2),
738 (0x4e00 << 16) | (0xc910 >> 2),
740 (0x5e00 << 16) | (0xc910 >> 2),
742 (0x6e00 << 16) | (0xc910 >> 2),
744 (0x7e00 << 16) | (0xc910 >> 2),
746 (0x0e00 << 16) | (0xc99c >> 2),
748 (0x0e00 << 16) | (0x9834 >> 2),
750 (0x0000 << 16) | (0x30f00 >> 2),
752 (0x0000 << 16) | (0x30f04 >> 2),
754 (0x0000 << 16) | (0x30f08 >> 2),
756 (0x0000 << 16) | (0x30f0c >> 2),
758 (0x0600 << 16) | (0x9b7c >> 2),
760 (0x0e00 << 16) | (0x8a14 >> 2),
762 (0x0e00 << 16) | (0x8a18 >> 2),
764 (0x0600 << 16) | (0x30a00 >> 2),
766 (0x0e00 << 16) | (0x8bf0 >> 2),
768 (0x0e00 << 16) | (0x8bcc >> 2),
770 (0x0e00 << 16) | (0x8b24 >> 2),
772 (0x0e00 << 16) | (0x30a04 >> 2),
774 (0x0600 << 16) | (0x30a10 >> 2),
776 (0x0600 << 16) | (0x30a14 >> 2),
778 (0x0600 << 16) | (0x30a18 >> 2),
780 (0x0600 << 16) | (0x30a2c >> 2),
782 (0x0e00 << 16) | (0xc700 >> 2),
784 (0x0e00 << 16) | (0xc704 >> 2),
786 (0x0e00 << 16) | (0xc708 >> 2),
788 (0x0e00 << 16) | (0xc768 >> 2),
790 (0x0400 << 16) | (0xc770 >> 2),
792 (0x0400 << 16) | (0xc774 >> 2),
794 (0x0400 << 16) | (0xc798 >> 2),
796 (0x0400 << 16) | (0xc79c >> 2),
798 (0x0e00 << 16) | (0x9100 >> 2),
800 (0x0e00 << 16) | (0x3c010 >> 2),
802 (0x0e00 << 16) | (0x8c00 >> 2),
804 (0x0e00 << 16) | (0x8c04 >> 2),
806 (0x0e00 << 16) | (0x8c20 >> 2),
808 (0x0e00 << 16) | (0x8c38 >> 2),
810 (0x0e00 << 16) | (0x8c3c >> 2),
812 (0x0e00 << 16) | (0xae00 >> 2),
814 (0x0e00 << 16) | (0x9604 >> 2),
816 (0x0e00 << 16) | (0xac08 >> 2),
818 (0x0e00 << 16) | (0xac0c >> 2),
820 (0x0e00 << 16) | (0xac10 >> 2),
822 (0x0e00 << 16) | (0xac14 >> 2),
824 (0x0e00 << 16) | (0xac58 >> 2),
826 (0x0e00 << 16) | (0xac68 >> 2),
828 (0x0e00 << 16) | (0xac6c >> 2),
830 (0x0e00 << 16) | (0xac70 >> 2),
832 (0x0e00 << 16) | (0xac74 >> 2),
834 (0x0e00 << 16) | (0xac78 >> 2),
836 (0x0e00 << 16) | (0xac7c >> 2),
838 (0x0e00 << 16) | (0xac80 >> 2),
840 (0x0e00 << 16) | (0xac84 >> 2),
842 (0x0e00 << 16) | (0xac88 >> 2),
844 (0x0e00 << 16) | (0xac8c >> 2),
846 (0x0e00 << 16) | (0x970c >> 2),
848 (0x0e00 << 16) | (0x9714 >> 2),
850 (0x0e00 << 16) | (0x9718 >> 2),
852 (0x0e00 << 16) | (0x971c >> 2),
854 (0x0e00 << 16) | (0x31068 >> 2),
856 (0x4e00 << 16) | (0x31068 >> 2),
858 (0x5e00 << 16) | (0x31068 >> 2),
860 (0x6e00 << 16) | (0x31068 >> 2),
862 (0x7e00 << 16) | (0x31068 >> 2),
864 (0x0e00 << 16) | (0xcd10 >> 2),
866 (0x0e00 << 16) | (0xcd14 >> 2),
868 (0x0e00 << 16) | (0x88b0 >> 2),
870 (0x0e00 << 16) | (0x88b4 >> 2),
872 (0x0e00 << 16) | (0x88b8 >> 2),
874 (0x0e00 << 16) | (0x88bc >> 2),
876 (0x0400 << 16) | (0x89c0 >> 2),
878 (0x0e00 << 16) | (0x88c4 >> 2),
880 (0x0e00 << 16) | (0x88c8 >> 2),
882 (0x0e00 << 16) | (0x88d0 >> 2),
884 (0x0e00 << 16) | (0x88d4 >> 2),
886 (0x0e00 << 16) | (0x88d8 >> 2),
888 (0x0e00 << 16) | (0x8980 >> 2),
890 (0x0e00 << 16) | (0x30938 >> 2),
892 (0x0e00 << 16) | (0x3093c >> 2),
894 (0x0e00 << 16) | (0x30940 >> 2),
896 (0x0e00 << 16) | (0x89a0 >> 2),
898 (0x0e00 << 16) | (0x30900 >> 2),
900 (0x0e00 << 16) | (0x30904 >> 2),
902 (0x0e00 << 16) | (0x89b4 >> 2),
904 (0x0e00 << 16) | (0x3e1fc >> 2),
906 (0x0e00 << 16) | (0x3c210 >> 2),
908 (0x0e00 << 16) | (0x3c214 >> 2),
910 (0x0e00 << 16) | (0x3c218 >> 2),
912 (0x0e00 << 16) | (0x8904 >> 2),
915 (0x0e00 << 16) | (0x8c28 >> 2),
916 (0x0e00 << 16) | (0x8c2c >> 2),
917 (0x0e00 << 16) | (0x8c30 >> 2),
918 (0x0e00 << 16) | (0x8c34 >> 2),
919 (0x0e00 << 16) | (0x9600 >> 2),
922 static const u32 bonaire_golden_spm_registers[] =
924 0x30800, 0xe0ffffff, 0xe0000000
927 static const u32 bonaire_golden_common_registers[] =
929 0xc770, 0xffffffff, 0x00000800,
930 0xc774, 0xffffffff, 0x00000800,
931 0xc798, 0xffffffff, 0x00007fbf,
932 0xc79c, 0xffffffff, 0x00007faf
935 static const u32 bonaire_golden_registers[] =
937 0x3354, 0x00000333, 0x00000333,
938 0x3350, 0x000c0fc0, 0x00040200,
939 0x9a10, 0x00010000, 0x00058208,
940 0x3c000, 0xffff1fff, 0x00140000,
941 0x3c200, 0xfdfc0fff, 0x00000100,
942 0x3c234, 0x40000000, 0x40000200,
943 0x9830, 0xffffffff, 0x00000000,
944 0x9834, 0xf00fffff, 0x00000400,
945 0x9838, 0x0002021c, 0x00020200,
946 0xc78, 0x00000080, 0x00000000,
947 0x5bb0, 0x000000f0, 0x00000070,
948 0x5bc0, 0xf0311fff, 0x80300000,
949 0x98f8, 0x73773777, 0x12010001,
950 0x350c, 0x00810000, 0x408af000,
951 0x7030, 0x31000111, 0x00000011,
952 0x2f48, 0x73773777, 0x12010001,
953 0x220c, 0x00007fb6, 0x0021a1b1,
954 0x2210, 0x00007fb6, 0x002021b1,
955 0x2180, 0x00007fb6, 0x00002191,
956 0x2218, 0x00007fb6, 0x002121b1,
957 0x221c, 0x00007fb6, 0x002021b1,
958 0x21dc, 0x00007fb6, 0x00002191,
959 0x21e0, 0x00007fb6, 0x00002191,
960 0x3628, 0x0000003f, 0x0000000a,
961 0x362c, 0x0000003f, 0x0000000a,
962 0x2ae4, 0x00073ffe, 0x000022a2,
963 0x240c, 0x000007ff, 0x00000000,
964 0x8a14, 0xf000003f, 0x00000007,
965 0x8bf0, 0x00002001, 0x00000001,
966 0x8b24, 0xffffffff, 0x00ffffff,
967 0x30a04, 0x0000ff0f, 0x00000000,
968 0x28a4c, 0x07ffffff, 0x06000000,
969 0x4d8, 0x00000fff, 0x00000100,
970 0x3e78, 0x00000001, 0x00000002,
971 0x9100, 0x03000000, 0x0362c688,
972 0x8c00, 0x000000ff, 0x00000001,
973 0xe40, 0x00001fff, 0x00001fff,
974 0x9060, 0x0000007f, 0x00000020,
975 0x9508, 0x00010000, 0x00010000,
976 0xac14, 0x000003ff, 0x000000f3,
977 0xac0c, 0xffffffff, 0x00001032
980 static const u32 bonaire_mgcg_cgcg_init[] =
982 0xc420, 0xffffffff, 0xfffffffc,
983 0x30800, 0xffffffff, 0xe0000000,
984 0x3c2a0, 0xffffffff, 0x00000100,
985 0x3c208, 0xffffffff, 0x00000100,
986 0x3c2c0, 0xffffffff, 0xc0000100,
987 0x3c2c8, 0xffffffff, 0xc0000100,
988 0x3c2c4, 0xffffffff, 0xc0000100,
989 0x55e4, 0xffffffff, 0x00600100,
990 0x3c280, 0xffffffff, 0x00000100,
991 0x3c214, 0xffffffff, 0x06000100,
992 0x3c220, 0xffffffff, 0x00000100,
993 0x3c218, 0xffffffff, 0x06000100,
994 0x3c204, 0xffffffff, 0x00000100,
995 0x3c2e0, 0xffffffff, 0x00000100,
996 0x3c224, 0xffffffff, 0x00000100,
997 0x3c200, 0xffffffff, 0x00000100,
998 0x3c230, 0xffffffff, 0x00000100,
999 0x3c234, 0xffffffff, 0x00000100,
1000 0x3c250, 0xffffffff, 0x00000100,
1001 0x3c254, 0xffffffff, 0x00000100,
1002 0x3c258, 0xffffffff, 0x00000100,
1003 0x3c25c, 0xffffffff, 0x00000100,
1004 0x3c260, 0xffffffff, 0x00000100,
1005 0x3c27c, 0xffffffff, 0x00000100,
1006 0x3c278, 0xffffffff, 0x00000100,
1007 0x3c210, 0xffffffff, 0x06000100,
1008 0x3c290, 0xffffffff, 0x00000100,
1009 0x3c274, 0xffffffff, 0x00000100,
1010 0x3c2b4, 0xffffffff, 0x00000100,
1011 0x3c2b0, 0xffffffff, 0x00000100,
1012 0x3c270, 0xffffffff, 0x00000100,
1013 0x30800, 0xffffffff, 0xe0000000,
1014 0x3c020, 0xffffffff, 0x00010000,
1015 0x3c024, 0xffffffff, 0x00030002,
1016 0x3c028, 0xffffffff, 0x00040007,
1017 0x3c02c, 0xffffffff, 0x00060005,
1018 0x3c030, 0xffffffff, 0x00090008,
1019 0x3c034, 0xffffffff, 0x00010000,
1020 0x3c038, 0xffffffff, 0x00030002,
1021 0x3c03c, 0xffffffff, 0x00040007,
1022 0x3c040, 0xffffffff, 0x00060005,
1023 0x3c044, 0xffffffff, 0x00090008,
1024 0x3c048, 0xffffffff, 0x00010000,
1025 0x3c04c, 0xffffffff, 0x00030002,
1026 0x3c050, 0xffffffff, 0x00040007,
1027 0x3c054, 0xffffffff, 0x00060005,
1028 0x3c058, 0xffffffff, 0x00090008,
1029 0x3c05c, 0xffffffff, 0x00010000,
1030 0x3c060, 0xffffffff, 0x00030002,
1031 0x3c064, 0xffffffff, 0x00040007,
1032 0x3c068, 0xffffffff, 0x00060005,
1033 0x3c06c, 0xffffffff, 0x00090008,
1034 0x3c070, 0xffffffff, 0x00010000,
1035 0x3c074, 0xffffffff, 0x00030002,
1036 0x3c078, 0xffffffff, 0x00040007,
1037 0x3c07c, 0xffffffff, 0x00060005,
1038 0x3c080, 0xffffffff, 0x00090008,
1039 0x3c084, 0xffffffff, 0x00010000,
1040 0x3c088, 0xffffffff, 0x00030002,
1041 0x3c08c, 0xffffffff, 0x00040007,
1042 0x3c090, 0xffffffff, 0x00060005,
1043 0x3c094, 0xffffffff, 0x00090008,
1044 0x3c098, 0xffffffff, 0x00010000,
1045 0x3c09c, 0xffffffff, 0x00030002,
1046 0x3c0a0, 0xffffffff, 0x00040007,
1047 0x3c0a4, 0xffffffff, 0x00060005,
1048 0x3c0a8, 0xffffffff, 0x00090008,
1049 0x3c000, 0xffffffff, 0x96e00200,
1050 0x8708, 0xffffffff, 0x00900100,
1051 0xc424, 0xffffffff, 0x0020003f,
1052 0x38, 0xffffffff, 0x0140001c,
1053 0x3c, 0x000f0000, 0x000f0000,
1054 0x220, 0xffffffff, 0xC060000C,
1055 0x224, 0xc0000fff, 0x00000100,
1056 0xf90, 0xffffffff, 0x00000100,
1057 0xf98, 0x00000101, 0x00000000,
1058 0x20a8, 0xffffffff, 0x00000104,
1059 0x55e4, 0xff000fff, 0x00000100,
1060 0x30cc, 0xc0000fff, 0x00000104,
1061 0xc1e4, 0x00000001, 0x00000001,
1062 0xd00c, 0xff000ff0, 0x00000100,
1063 0xd80c, 0xff000ff0, 0x00000100
1066 static const u32 spectre_golden_spm_registers[] =
1068 0x30800, 0xe0ffffff, 0xe0000000
1071 static const u32 spectre_golden_common_registers[] =
1073 0xc770, 0xffffffff, 0x00000800,
1074 0xc774, 0xffffffff, 0x00000800,
1075 0xc798, 0xffffffff, 0x00007fbf,
1076 0xc79c, 0xffffffff, 0x00007faf
1079 static const u32 spectre_golden_registers[] =
1081 0x3c000, 0xffff1fff, 0x96940200,
1082 0x3c00c, 0xffff0001, 0xff000000,
1083 0x3c200, 0xfffc0fff, 0x00000100,
1084 0x6ed8, 0x00010101, 0x00010000,
1085 0x9834, 0xf00fffff, 0x00000400,
1086 0x9838, 0xfffffffc, 0x00020200,
1087 0x5bb0, 0x000000f0, 0x00000070,
1088 0x5bc0, 0xf0311fff, 0x80300000,
1089 0x98f8, 0x73773777, 0x12010001,
1090 0x9b7c, 0x00ff0000, 0x00fc0000,
1091 0x2f48, 0x73773777, 0x12010001,
1092 0x8a14, 0xf000003f, 0x00000007,
1093 0x8b24, 0xffffffff, 0x00ffffff,
1094 0x28350, 0x3f3f3fff, 0x00000082,
1095 0x28355, 0x0000003f, 0x00000000,
1096 0x3e78, 0x00000001, 0x00000002,
1097 0x913c, 0xffff03df, 0x00000004,
1098 0xc768, 0x00000008, 0x00000008,
1099 0x8c00, 0x000008ff, 0x00000800,
1100 0x9508, 0x00010000, 0x00010000,
1101 0xac0c, 0xffffffff, 0x54763210,
1102 0x214f8, 0x01ff01ff, 0x00000002,
1103 0x21498, 0x007ff800, 0x00200000,
1104 0x2015c, 0xffffffff, 0x00000f40,
1105 0x30934, 0xffffffff, 0x00000001
1108 static const u32 spectre_mgcg_cgcg_init[] =
1110 0xc420, 0xffffffff, 0xfffffffc,
1111 0x30800, 0xffffffff, 0xe0000000,
1112 0x3c2a0, 0xffffffff, 0x00000100,
1113 0x3c208, 0xffffffff, 0x00000100,
1114 0x3c2c0, 0xffffffff, 0x00000100,
1115 0x3c2c8, 0xffffffff, 0x00000100,
1116 0x3c2c4, 0xffffffff, 0x00000100,
1117 0x55e4, 0xffffffff, 0x00600100,
1118 0x3c280, 0xffffffff, 0x00000100,
1119 0x3c214, 0xffffffff, 0x06000100,
1120 0x3c220, 0xffffffff, 0x00000100,
1121 0x3c218, 0xffffffff, 0x06000100,
1122 0x3c204, 0xffffffff, 0x00000100,
1123 0x3c2e0, 0xffffffff, 0x00000100,
1124 0x3c224, 0xffffffff, 0x00000100,
1125 0x3c200, 0xffffffff, 0x00000100,
1126 0x3c230, 0xffffffff, 0x00000100,
1127 0x3c234, 0xffffffff, 0x00000100,
1128 0x3c250, 0xffffffff, 0x00000100,
1129 0x3c254, 0xffffffff, 0x00000100,
1130 0x3c258, 0xffffffff, 0x00000100,
1131 0x3c25c, 0xffffffff, 0x00000100,
1132 0x3c260, 0xffffffff, 0x00000100,
1133 0x3c27c, 0xffffffff, 0x00000100,
1134 0x3c278, 0xffffffff, 0x00000100,
1135 0x3c210, 0xffffffff, 0x06000100,
1136 0x3c290, 0xffffffff, 0x00000100,
1137 0x3c274, 0xffffffff, 0x00000100,
1138 0x3c2b4, 0xffffffff, 0x00000100,
1139 0x3c2b0, 0xffffffff, 0x00000100,
1140 0x3c270, 0xffffffff, 0x00000100,
1141 0x30800, 0xffffffff, 0xe0000000,
1142 0x3c020, 0xffffffff, 0x00010000,
1143 0x3c024, 0xffffffff, 0x00030002,
1144 0x3c028, 0xffffffff, 0x00040007,
1145 0x3c02c, 0xffffffff, 0x00060005,
1146 0x3c030, 0xffffffff, 0x00090008,
1147 0x3c034, 0xffffffff, 0x00010000,
1148 0x3c038, 0xffffffff, 0x00030002,
1149 0x3c03c, 0xffffffff, 0x00040007,
1150 0x3c040, 0xffffffff, 0x00060005,
1151 0x3c044, 0xffffffff, 0x00090008,
1152 0x3c048, 0xffffffff, 0x00010000,
1153 0x3c04c, 0xffffffff, 0x00030002,
1154 0x3c050, 0xffffffff, 0x00040007,
1155 0x3c054, 0xffffffff, 0x00060005,
1156 0x3c058, 0xffffffff, 0x00090008,
1157 0x3c05c, 0xffffffff, 0x00010000,
1158 0x3c060, 0xffffffff, 0x00030002,
1159 0x3c064, 0xffffffff, 0x00040007,
1160 0x3c068, 0xffffffff, 0x00060005,
1161 0x3c06c, 0xffffffff, 0x00090008,
1162 0x3c070, 0xffffffff, 0x00010000,
1163 0x3c074, 0xffffffff, 0x00030002,
1164 0x3c078, 0xffffffff, 0x00040007,
1165 0x3c07c, 0xffffffff, 0x00060005,
1166 0x3c080, 0xffffffff, 0x00090008,
1167 0x3c084, 0xffffffff, 0x00010000,
1168 0x3c088, 0xffffffff, 0x00030002,
1169 0x3c08c, 0xffffffff, 0x00040007,
1170 0x3c090, 0xffffffff, 0x00060005,
1171 0x3c094, 0xffffffff, 0x00090008,
1172 0x3c098, 0xffffffff, 0x00010000,
1173 0x3c09c, 0xffffffff, 0x00030002,
1174 0x3c0a0, 0xffffffff, 0x00040007,
1175 0x3c0a4, 0xffffffff, 0x00060005,
1176 0x3c0a8, 0xffffffff, 0x00090008,
1177 0x3c0ac, 0xffffffff, 0x00010000,
1178 0x3c0b0, 0xffffffff, 0x00030002,
1179 0x3c0b4, 0xffffffff, 0x00040007,
1180 0x3c0b8, 0xffffffff, 0x00060005,
1181 0x3c0bc, 0xffffffff, 0x00090008,
1182 0x3c000, 0xffffffff, 0x96e00200,
1183 0x8708, 0xffffffff, 0x00900100,
1184 0xc424, 0xffffffff, 0x0020003f,
1185 0x38, 0xffffffff, 0x0140001c,
1186 0x3c, 0x000f0000, 0x000f0000,
1187 0x220, 0xffffffff, 0xC060000C,
1188 0x224, 0xc0000fff, 0x00000100,
1189 0xf90, 0xffffffff, 0x00000100,
1190 0xf98, 0x00000101, 0x00000000,
1191 0x20a8, 0xffffffff, 0x00000104,
1192 0x55e4, 0xff000fff, 0x00000100,
1193 0x30cc, 0xc0000fff, 0x00000104,
1194 0xc1e4, 0x00000001, 0x00000001,
1195 0xd00c, 0xff000ff0, 0x00000100,
1196 0xd80c, 0xff000ff0, 0x00000100
1199 static const u32 kalindi_golden_spm_registers[] =
1201 0x30800, 0xe0ffffff, 0xe0000000
1204 static const u32 kalindi_golden_common_registers[] =
1206 0xc770, 0xffffffff, 0x00000800,
1207 0xc774, 0xffffffff, 0x00000800,
1208 0xc798, 0xffffffff, 0x00007fbf,
1209 0xc79c, 0xffffffff, 0x00007faf
1212 static const u32 kalindi_golden_registers[] =
1214 0x3c000, 0xffffdfff, 0x6e944040,
1215 0x55e4, 0xff607fff, 0xfc000100,
1216 0x3c220, 0xff000fff, 0x00000100,
1217 0x3c224, 0xff000fff, 0x00000100,
1218 0x3c200, 0xfffc0fff, 0x00000100,
1219 0x6ed8, 0x00010101, 0x00010000,
1220 0x9830, 0xffffffff, 0x00000000,
1221 0x9834, 0xf00fffff, 0x00000400,
1222 0x5bb0, 0x000000f0, 0x00000070,
1223 0x5bc0, 0xf0311fff, 0x80300000,
1224 0x98f8, 0x73773777, 0x12010001,
1225 0x98fc, 0xffffffff, 0x00000010,
1226 0x9b7c, 0x00ff0000, 0x00fc0000,
1227 0x8030, 0x00001f0f, 0x0000100a,
1228 0x2f48, 0x73773777, 0x12010001,
1229 0x2408, 0x000fffff, 0x000c007f,
1230 0x8a14, 0xf000003f, 0x00000007,
1231 0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 0x30a04, 0x0000ff0f, 0x00000000,
1233 0x28a4c, 0x07ffffff, 0x06000000,
1234 0x4d8, 0x00000fff, 0x00000100,
1235 0x3e78, 0x00000001, 0x00000002,
1236 0xc768, 0x00000008, 0x00000008,
1237 0x8c00, 0x000000ff, 0x00000003,
1238 0x214f8, 0x01ff01ff, 0x00000002,
1239 0x21498, 0x007ff800, 0x00200000,
1240 0x2015c, 0xffffffff, 0x00000f40,
1241 0x88c4, 0x001f3ae3, 0x00000082,
1242 0x88d4, 0x0000001f, 0x00000010,
1243 0x30934, 0xffffffff, 0x00000000
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1248 0xc420, 0xffffffff, 0xfffffffc,
1249 0x30800, 0xffffffff, 0xe0000000,
1250 0x3c2a0, 0xffffffff, 0x00000100,
1251 0x3c208, 0xffffffff, 0x00000100,
1252 0x3c2c0, 0xffffffff, 0x00000100,
1253 0x3c2c8, 0xffffffff, 0x00000100,
1254 0x3c2c4, 0xffffffff, 0x00000100,
1255 0x55e4, 0xffffffff, 0x00600100,
1256 0x3c280, 0xffffffff, 0x00000100,
1257 0x3c214, 0xffffffff, 0x06000100,
1258 0x3c220, 0xffffffff, 0x00000100,
1259 0x3c218, 0xffffffff, 0x06000100,
1260 0x3c204, 0xffffffff, 0x00000100,
1261 0x3c2e0, 0xffffffff, 0x00000100,
1262 0x3c224, 0xffffffff, 0x00000100,
1263 0x3c200, 0xffffffff, 0x00000100,
1264 0x3c230, 0xffffffff, 0x00000100,
1265 0x3c234, 0xffffffff, 0x00000100,
1266 0x3c250, 0xffffffff, 0x00000100,
1267 0x3c254, 0xffffffff, 0x00000100,
1268 0x3c258, 0xffffffff, 0x00000100,
1269 0x3c25c, 0xffffffff, 0x00000100,
1270 0x3c260, 0xffffffff, 0x00000100,
1271 0x3c27c, 0xffffffff, 0x00000100,
1272 0x3c278, 0xffffffff, 0x00000100,
1273 0x3c210, 0xffffffff, 0x06000100,
1274 0x3c290, 0xffffffff, 0x00000100,
1275 0x3c274, 0xffffffff, 0x00000100,
1276 0x3c2b4, 0xffffffff, 0x00000100,
1277 0x3c2b0, 0xffffffff, 0x00000100,
1278 0x3c270, 0xffffffff, 0x00000100,
1279 0x30800, 0xffffffff, 0xe0000000,
1280 0x3c020, 0xffffffff, 0x00010000,
1281 0x3c024, 0xffffffff, 0x00030002,
1282 0x3c028, 0xffffffff, 0x00040007,
1283 0x3c02c, 0xffffffff, 0x00060005,
1284 0x3c030, 0xffffffff, 0x00090008,
1285 0x3c034, 0xffffffff, 0x00010000,
1286 0x3c038, 0xffffffff, 0x00030002,
1287 0x3c03c, 0xffffffff, 0x00040007,
1288 0x3c040, 0xffffffff, 0x00060005,
1289 0x3c044, 0xffffffff, 0x00090008,
1290 0x3c000, 0xffffffff, 0x96e00200,
1291 0x8708, 0xffffffff, 0x00900100,
1292 0xc424, 0xffffffff, 0x0020003f,
1293 0x38, 0xffffffff, 0x0140001c,
1294 0x3c, 0x000f0000, 0x000f0000,
1295 0x220, 0xffffffff, 0xC060000C,
1296 0x224, 0xc0000fff, 0x00000100,
1297 0x20a8, 0xffffffff, 0x00000104,
1298 0x55e4, 0xff000fff, 0x00000100,
1299 0x30cc, 0xc0000fff, 0x00000104,
1300 0xc1e4, 0x00000001, 0x00000001,
1301 0xd00c, 0xff000ff0, 0x00000100,
1302 0xd80c, 0xff000ff0, 0x00000100
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1307 switch (rdev->family) {
1309 radeon_program_register_sequence(rdev,
1310 bonaire_mgcg_cgcg_init,
1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 radeon_program_register_sequence(rdev,
1313 bonaire_golden_registers,
1314 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 radeon_program_register_sequence(rdev,
1316 bonaire_golden_common_registers,
1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 radeon_program_register_sequence(rdev,
1319 bonaire_golden_spm_registers,
1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1323 radeon_program_register_sequence(rdev,
1324 kalindi_mgcg_cgcg_init,
1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 radeon_program_register_sequence(rdev,
1327 kalindi_golden_registers,
1328 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 radeon_program_register_sequence(rdev,
1330 kalindi_golden_common_registers,
1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 radeon_program_register_sequence(rdev,
1333 kalindi_golden_spm_registers,
1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1337 radeon_program_register_sequence(rdev,
1338 spectre_mgcg_cgcg_init,
1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 radeon_program_register_sequence(rdev,
1341 spectre_golden_registers,
1342 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 radeon_program_register_sequence(rdev,
1344 spectre_golden_common_registers,
1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 radeon_program_register_sequence(rdev,
1347 spectre_golden_spm_registers,
1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1356 * cik_get_xclk - get the xclk
1358 * @rdev: radeon_device pointer
1360 * Returns the reference clock used by the gfx engine
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1365 u32 reference_clock = rdev->clock.spll.reference_freq;
1367 if (rdev->flags & RADEON_IS_IGP) {
1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 return reference_clock / 2;
1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 return reference_clock / 4;
1374 return reference_clock;
1378 * cik_mm_rdoorbell - read a doorbell dword
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1383 * Returns the value in the doorbell aperture at the
1384 * requested offset (CIK).
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1388 if (offset < rdev->doorbell.size) {
1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1397 * cik_mm_wdoorbell - write a doorbell dword
1399 * @rdev: radeon_device pointer
1400 * @offset: byte offset into the aperture
1401 * @v: value to write
1403 * Writes @v to the doorbell aperture at the
1404 * requested offset (CIK).
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1408 if (offset < rdev->doorbell.size) {
1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1419 {0x00000070, 0x04400000},
1420 {0x00000071, 0x80c01803},
1421 {0x00000072, 0x00004004},
1422 {0x00000073, 0x00000100},
1423 {0x00000074, 0x00ff0000},
1424 {0x00000075, 0x34000000},
1425 {0x00000076, 0x08000014},
1426 {0x00000077, 0x00cc08ec},
1427 {0x00000078, 0x00000400},
1428 {0x00000079, 0x00000000},
1429 {0x0000007a, 0x04090000},
1430 {0x0000007c, 0x00000000},
1431 {0x0000007e, 0x4408a8e8},
1432 {0x0000007f, 0x00000304},
1433 {0x00000080, 0x00000000},
1434 {0x00000082, 0x00000001},
1435 {0x00000083, 0x00000002},
1436 {0x00000084, 0xf3e4f400},
1437 {0x00000085, 0x052024e3},
1438 {0x00000087, 0x00000000},
1439 {0x00000088, 0x01000000},
1440 {0x0000008a, 0x1c0a0000},
1441 {0x0000008b, 0xff010000},
1442 {0x0000008d, 0xffffefff},
1443 {0x0000008e, 0xfff3efff},
1444 {0x0000008f, 0xfff3efbf},
1445 {0x00000092, 0xf7ffffff},
1446 {0x00000093, 0xffffff7f},
1447 {0x00000095, 0x00101101},
1448 {0x00000096, 0x00000fff},
1449 {0x00000097, 0x00116fff},
1450 {0x00000098, 0x60010000},
1451 {0x00000099, 0x10010000},
1452 {0x0000009a, 0x00006000},
1453 {0x0000009b, 0x00001000},
1454 {0x0000009f, 0x00b48000}
1458 * cik_srbm_select - select specific register instances
1460 * @rdev: radeon_device pointer
1461 * @me: selected ME (micro engine)
1466 * Switches the currently active registers instances. Some
1467 * registers are instanced per VMID, others are instanced per
1468 * me/pipe/queue combination.
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471 u32 me, u32 pipe, u32 queue, u32 vmid)
1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1476 QUEUEID(queue & 0x7));
1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1482 * ci_mc_load_microcode - load MC ucode into the hw
1484 * @rdev: radeon_device pointer
1486 * Load the GDDR MC ucode into the hw (CIK).
1487 * Returns 0 on success, error on failure.
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1491 const __be32 *fw_data;
1492 u32 running, blackout = 0;
1494 int i, ucode_size, regs_size;
1499 switch (rdev->family) {
1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 ucode_size = CIK_MC_UCODE_SIZE;
1504 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1516 /* reset the engine and set to writable */
1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1520 /* load mc io regs */
1521 for (i = 0; i < regs_size; i++) {
1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1525 /* load the MC ucode */
1526 fw_data = (const __be32 *)rdev->mc_fw->data;
1527 for (i = 0; i < ucode_size; i++)
1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1530 /* put the engine back into the active state */
1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1535 /* wait for training to complete */
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1541 for (i = 0; i < rdev->usec_timeout; i++) {
1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1555 * cik_init_microcode - load ucode images from disk
1557 * @rdev: radeon_device pointer
1559 * Use the firmware interface to load the ucode images into
1560 * the driver (not loaded into hw).
1561 * Returns 0 on success, error on failure.
1563 static int cik_init_microcode(struct radeon_device *rdev)
1565 const char *chip_name;
1566 size_t pfp_req_size, me_req_size, ce_req_size,
1567 mec_req_size, rlc_req_size, mc_req_size,
1568 sdma_req_size, smc_req_size;
1574 switch (rdev->family) {
1576 chip_name = "BONAIRE";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1587 chip_name = "KAVERI";
1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1596 chip_name = "KABINI";
1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1607 DRM_INFO("Loading %s Microcode\n", chip_name);
1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1613 if (rdev->pfp_fw->size != pfp_req_size) {
1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 rdev->pfp_fw->size, fw_name);
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1625 if (rdev->me_fw->size != me_req_size) {
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->me_fw->size, fw_name);
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1636 if (rdev->ce_fw->size != ce_req_size) {
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->ce_fw->size, fw_name);
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1647 if (rdev->mec_fw->size != mec_req_size) {
1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->mec_fw->size, fw_name);
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1658 if (rdev->rlc_fw->size != rlc_req_size) {
1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->rlc_fw->size, fw_name);
1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1669 if (rdev->sdma_fw->size != sdma_req_size) {
1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 rdev->sdma_fw->size, fw_name);
1676 /* No SMC, MC ucode on APUs */
1677 if (!(rdev->flags & RADEON_IS_IGP)) {
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1682 if (rdev->mc_fw->size != mc_req_size) {
1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 rdev->mc_fw->size, fw_name);
1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1693 "smc: error loading firmware \"%s\"\n",
1695 release_firmware(rdev->smc_fw);
1696 rdev->smc_fw = NULL;
1698 } else if (rdev->smc_fw->size != smc_req_size) {
1700 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1701 rdev->smc_fw->size, fw_name);
1710 "cik_cp: Failed to load firmware \"%s\"\n",
1712 release_firmware(rdev->pfp_fw);
1713 rdev->pfp_fw = NULL;
1714 release_firmware(rdev->me_fw);
1716 release_firmware(rdev->ce_fw);
1718 release_firmware(rdev->rlc_fw);
1719 rdev->rlc_fw = NULL;
1720 release_firmware(rdev->mc_fw);
1722 release_firmware(rdev->smc_fw);
1723 rdev->smc_fw = NULL;
1732 * cik_tiling_mode_table_init - init the hw tiling table
1734 * @rdev: radeon_device pointer
1736 * Starting with SI, the tiling setup is done globally in a
1737 * set of 32 tiling modes. Rather than selecting each set of
1738 * parameters per surface as on older asics, we just select
1739 * which index in the tiling table we want to use, and the
1740 * surface uses those parameters (CIK).
1742 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1744 const u32 num_tile_mode_states = 32;
1745 const u32 num_secondary_tile_mode_states = 16;
1746 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1747 u32 num_pipe_configs;
1748 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1749 rdev->config.cik.max_shader_engines;
1751 switch (rdev->config.cik.mem_row_size_in_kb) {
1753 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1760 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1764 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1765 if (num_pipe_configs > 8)
1766 num_pipe_configs = 8; /* ??? */
1768 if (num_pipe_configs == 8) {
1769 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1770 switch (reg_offset) {
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1792 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1799 TILE_SPLIT(split_equal_to_row_size));
1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1815 TILE_SPLIT(split_equal_to_row_size));
1818 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1823 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1834 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1844 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1856 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1860 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1866 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1876 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1878 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1882 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1883 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1891 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1892 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1894 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1895 switch (reg_offset) {
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1912 NUM_BANKS(ADDR_SURF_16_BANK));
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918 NUM_BANKS(ADDR_SURF_16_BANK));
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_8_BANK));
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1930 NUM_BANKS(ADDR_SURF_4_BANK));
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1936 NUM_BANKS(ADDR_SURF_2_BANK));
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1954 NUM_BANKS(ADDR_SURF_16_BANK));
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1960 NUM_BANKS(ADDR_SURF_16_BANK));
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_8_BANK));
1969 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1972 NUM_BANKS(ADDR_SURF_4_BANK));
1975 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1978 NUM_BANKS(ADDR_SURF_2_BANK));
1984 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1986 } else if (num_pipe_configs == 4) {
1988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1989 switch (reg_offset) {
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2015 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2018 TILE_SPLIT(split_equal_to_row_size));
2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2033 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2034 TILE_SPLIT(split_equal_to_row_size));
2037 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2041 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2057 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2081 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2085 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2110 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2111 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2113 } else if (num_rbs < 4) {
2114 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2115 switch (reg_offset) {
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2137 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2138 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2141 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2143 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144 TILE_SPLIT(split_equal_to_row_size));
2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 TILE_SPLIT(split_equal_to_row_size));
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2167 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2212 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2223 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2236 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2237 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2240 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2241 switch (reg_offset) {
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_16_BANK));
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 NUM_BANKS(ADDR_SURF_16_BANK));
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 NUM_BANKS(ADDR_SURF_8_BANK));
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 NUM_BANKS(ADDR_SURF_4_BANK));
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 NUM_BANKS(ADDR_SURF_16_BANK));
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_16_BANK));
2315 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318 NUM_BANKS(ADDR_SURF_8_BANK));
2321 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2324 NUM_BANKS(ADDR_SURF_4_BANK));
2330 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2332 } else if (num_pipe_configs == 2) {
2333 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2334 switch (reg_offset) {
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 PIPE_CONFIG(ADDR_SURF_P2) |
2357 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2360 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2362 PIPE_CONFIG(ADDR_SURF_P2) |
2363 TILE_SPLIT(split_equal_to_row_size));
2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P2) |
2373 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2378 PIPE_CONFIG(ADDR_SURF_P2) |
2379 TILE_SPLIT(split_equal_to_row_size));
2382 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2385 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 PIPE_CONFIG(ADDR_SURF_P2) |
2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 PIPE_CONFIG(ADDR_SURF_P2) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 PIPE_CONFIG(ADDR_SURF_P2) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 PIPE_CONFIG(ADDR_SURF_P2) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2455 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2457 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2458 switch (reg_offset) {
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487 NUM_BANKS(ADDR_SURF_16_BANK));
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 NUM_BANKS(ADDR_SURF_8_BANK));
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2547 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2550 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2554 * cik_select_se_sh - select which SE, SH to address
2556 * @rdev: radeon_device pointer
2557 * @se_num: shader engine to address
2558 * @sh_num: sh block to address
2560 * Select which SE, SH combinations to address. Certain
2561 * registers are instanced per SE or SH. 0xffffffff means
2562 * broadcast to all SEs or SHs (CIK).
2564 static void cik_select_se_sh(struct radeon_device *rdev,
2565 u32 se_num, u32 sh_num)
2567 u32 data = INSTANCE_BROADCAST_WRITES;
2569 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2570 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2571 else if (se_num == 0xffffffff)
2572 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2573 else if (sh_num == 0xffffffff)
2574 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2576 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2577 WREG32(GRBM_GFX_INDEX, data);
2581 * cik_create_bitmask - create a bitmask
2583 * @bit_width: length of the mask
2585 * create a variable length bit mask (CIK).
2586 * Returns the bitmask.
2588 static u32 cik_create_bitmask(u32 bit_width)
2592 for (i = 0; i < bit_width; i++) {
2600 * cik_select_se_sh - select which SE, SH to address
2602 * @rdev: radeon_device pointer
2603 * @max_rb_num: max RBs (render backends) for the asic
2604 * @se_num: number of SEs (shader engines) for the asic
2605 * @sh_per_se: number of SH blocks per SE for the asic
2607 * Calculates the bitmask of disabled RBs (CIK).
2608 * Returns the disabled RB bitmask.
2610 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2611 u32 max_rb_num, u32 se_num,
2616 data = RREG32(CC_RB_BACKEND_DISABLE);
2618 data &= BACKEND_DISABLE_MASK;
2621 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2623 data >>= BACKEND_DISABLE_SHIFT;
2625 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2631 * cik_setup_rb - setup the RBs on the asic
2633 * @rdev: radeon_device pointer
2634 * @se_num: number of SEs (shader engines) for the asic
2635 * @sh_per_se: number of SH blocks per SE for the asic
2636 * @max_rb_num: max RBs (render backends) for the asic
2638 * Configures per-SE/SH RB registers (CIK).
2640 static void cik_setup_rb(struct radeon_device *rdev,
2641 u32 se_num, u32 sh_per_se,
2646 u32 disabled_rbs = 0;
2647 u32 enabled_rbs = 0;
2649 for (i = 0; i < se_num; i++) {
2650 for (j = 0; j < sh_per_se; j++) {
2651 cik_select_se_sh(rdev, i, j);
2652 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2653 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2656 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2659 for (i = 0; i < max_rb_num; i++) {
2660 if (!(disabled_rbs & mask))
2661 enabled_rbs |= mask;
2665 for (i = 0; i < se_num; i++) {
2666 cik_select_se_sh(rdev, i, 0xffffffff);
2668 for (j = 0; j < sh_per_se; j++) {
2669 switch (enabled_rbs & 3) {
2671 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2674 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2678 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2683 WREG32(PA_SC_RASTER_CONFIG, data);
2685 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2689 * cik_gpu_init - setup the 3D engine
2691 * @rdev: radeon_device pointer
2693 * Configures the 3D engine and tiling configuration
2694 * registers so that the 3D engine is usable.
2696 static void cik_gpu_init(struct radeon_device *rdev)
2698 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2699 u32 mc_shared_chmap, mc_arb_ramcfg;
2700 u32 hdp_host_path_cntl;
2704 switch (rdev->family) {
2706 rdev->config.cik.max_shader_engines = 2;
2707 rdev->config.cik.max_tile_pipes = 4;
2708 rdev->config.cik.max_cu_per_sh = 7;
2709 rdev->config.cik.max_sh_per_se = 1;
2710 rdev->config.cik.max_backends_per_se = 2;
2711 rdev->config.cik.max_texture_channel_caches = 4;
2712 rdev->config.cik.max_gprs = 256;
2713 rdev->config.cik.max_gs_threads = 32;
2714 rdev->config.cik.max_hw_contexts = 8;
2716 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2717 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2718 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2719 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2720 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2723 rdev->config.cik.max_shader_engines = 1;
2724 rdev->config.cik.max_tile_pipes = 4;
2725 if ((rdev->pdev->device == 0x1304) ||
2726 (rdev->pdev->device == 0x1305) ||
2727 (rdev->pdev->device == 0x130C) ||
2728 (rdev->pdev->device == 0x130F) ||
2729 (rdev->pdev->device == 0x1310) ||
2730 (rdev->pdev->device == 0x1311) ||
2731 (rdev->pdev->device == 0x131C)) {
2732 rdev->config.cik.max_cu_per_sh = 8;
2733 rdev->config.cik.max_backends_per_se = 2;
2734 } else if ((rdev->pdev->device == 0x1309) ||
2735 (rdev->pdev->device == 0x130A) ||
2736 (rdev->pdev->device == 0x130D) ||
2737 (rdev->pdev->device == 0x1313) ||
2738 (rdev->pdev->device == 0x131D)) {
2739 rdev->config.cik.max_cu_per_sh = 6;
2740 rdev->config.cik.max_backends_per_se = 2;
2741 } else if ((rdev->pdev->device == 0x1306) ||
2742 (rdev->pdev->device == 0x1307) ||
2743 (rdev->pdev->device == 0x130B) ||
2744 (rdev->pdev->device == 0x130E) ||
2745 (rdev->pdev->device == 0x1315) ||
2746 (rdev->pdev->device == 0x131B)) {
2747 rdev->config.cik.max_cu_per_sh = 4;
2748 rdev->config.cik.max_backends_per_se = 1;
2750 rdev->config.cik.max_cu_per_sh = 3;
2751 rdev->config.cik.max_backends_per_se = 1;
2753 rdev->config.cik.max_sh_per_se = 1;
2754 rdev->config.cik.max_texture_channel_caches = 4;
2755 rdev->config.cik.max_gprs = 256;
2756 rdev->config.cik.max_gs_threads = 16;
2757 rdev->config.cik.max_hw_contexts = 8;
2759 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2760 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2761 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2762 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2763 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2767 rdev->config.cik.max_shader_engines = 1;
2768 rdev->config.cik.max_tile_pipes = 2;
2769 rdev->config.cik.max_cu_per_sh = 2;
2770 rdev->config.cik.max_sh_per_se = 1;
2771 rdev->config.cik.max_backends_per_se = 1;
2772 rdev->config.cik.max_texture_channel_caches = 2;
2773 rdev->config.cik.max_gprs = 256;
2774 rdev->config.cik.max_gs_threads = 16;
2775 rdev->config.cik.max_hw_contexts = 8;
2777 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2778 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2779 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2780 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2781 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2785 /* Initialize HDP */
2786 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2787 WREG32((0x2c14 + j), 0x00000000);
2788 WREG32((0x2c18 + j), 0x00000000);
2789 WREG32((0x2c1c + j), 0x00000000);
2790 WREG32((0x2c20 + j), 0x00000000);
2791 WREG32((0x2c24 + j), 0x00000000);
2794 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2796 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2798 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2799 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2801 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2802 rdev->config.cik.mem_max_burst_length_bytes = 256;
2803 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2804 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2805 if (rdev->config.cik.mem_row_size_in_kb > 4)
2806 rdev->config.cik.mem_row_size_in_kb = 4;
2807 /* XXX use MC settings? */
2808 rdev->config.cik.shader_engine_tile_size = 32;
2809 rdev->config.cik.num_gpus = 1;
2810 rdev->config.cik.multi_gpu_tile_size = 64;
2812 /* fix up row size */
2813 gb_addr_config &= ~ROW_SIZE_MASK;
2814 switch (rdev->config.cik.mem_row_size_in_kb) {
2817 gb_addr_config |= ROW_SIZE(0);
2820 gb_addr_config |= ROW_SIZE(1);
2823 gb_addr_config |= ROW_SIZE(2);
2827 /* setup tiling info dword. gb_addr_config is not adequate since it does
2828 * not have bank info, so create a custom tiling dword.
2829 * bits 3:0 num_pipes
2830 * bits 7:4 num_banks
2831 * bits 11:8 group_size
2832 * bits 15:12 row_size
2834 rdev->config.cik.tile_config = 0;
2835 switch (rdev->config.cik.num_tile_pipes) {
2837 rdev->config.cik.tile_config |= (0 << 0);
2840 rdev->config.cik.tile_config |= (1 << 0);
2843 rdev->config.cik.tile_config |= (2 << 0);
2847 /* XXX what about 12? */
2848 rdev->config.cik.tile_config |= (3 << 0);
2851 rdev->config.cik.tile_config |=
2852 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2853 rdev->config.cik.tile_config |=
2854 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2855 rdev->config.cik.tile_config |=
2856 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2858 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2859 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2860 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2861 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2863 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2864 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2865 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2867 cik_tiling_mode_table_init(rdev);
2869 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2870 rdev->config.cik.max_sh_per_se,
2871 rdev->config.cik.max_backends_per_se);
2873 /* set HW defaults for 3D engine */
2874 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2876 WREG32(SX_DEBUG_1, 0x20);
2878 WREG32(TA_CNTL_AUX, 0x00010000);
2880 tmp = RREG32(SPI_CONFIG_CNTL);
2882 WREG32(SPI_CONFIG_CNTL, tmp);
2884 WREG32(SQ_CONFIG, 1);
2886 WREG32(DB_DEBUG, 0);
2888 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2890 WREG32(DB_DEBUG2, tmp);
2892 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2894 WREG32(DB_DEBUG3, tmp);
2896 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2898 WREG32(CB_HW_CONTROL, tmp);
2900 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2902 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2903 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2904 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2905 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2907 WREG32(VGT_NUM_INSTANCES, 1);
2909 WREG32(CP_PERFMON_CNTL, 0);
2911 WREG32(SQ_CONFIG, 0);
2913 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2914 FORCE_EOV_MAX_REZ_CNT(255)));
2916 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2917 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2919 WREG32(VGT_GS_VERTEX_REUSE, 16);
2920 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2922 tmp = RREG32(HDP_MISC_CNTL);
2923 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2924 WREG32(HDP_MISC_CNTL, tmp);
2926 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2927 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2929 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2930 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2936 * GPU scratch registers helpers function.
2939 * cik_scratch_init - setup driver info for CP scratch regs
2941 * @rdev: radeon_device pointer
2943 * Set up the number and offset of the CP scratch registers.
2944 * NOTE: use of CP scratch registers is a legacy inferface and
2945 * is not used by default on newer asics (r6xx+). On newer asics,
2946 * memory buffers are used for fences rather than scratch regs.
2948 static void cik_scratch_init(struct radeon_device *rdev)
2952 rdev->scratch.num_reg = 7;
2953 rdev->scratch.reg_base = SCRATCH_REG0;
2954 for (i = 0; i < rdev->scratch.num_reg; i++) {
2955 rdev->scratch.free[i] = true;
2956 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2961 * cik_ring_test - basic gfx ring test
2963 * @rdev: radeon_device pointer
2964 * @ring: radeon_ring structure holding ring information
2966 * Allocate a scratch register and write to it using the gfx ring (CIK).
2967 * Provides a basic gfx ring test to verify that the ring is working.
2968 * Used by cik_cp_gfx_resume();
2969 * Returns 0 on success, error on failure.
2971 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2978 r = radeon_scratch_get(rdev, &scratch);
2980 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2983 WREG32(scratch, 0xCAFEDEAD);
2984 r = radeon_ring_lock(rdev, ring, 3);
2986 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2987 radeon_scratch_free(rdev, scratch);
2990 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2991 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2992 radeon_ring_write(ring, 0xDEADBEEF);
2993 radeon_ring_unlock_commit(rdev, ring);
2995 for (i = 0; i < rdev->usec_timeout; i++) {
2996 tmp = RREG32(scratch);
2997 if (tmp == 0xDEADBEEF)
3001 if (i < rdev->usec_timeout) {
3002 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3004 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3005 ring->idx, scratch, tmp);
3008 radeon_scratch_free(rdev, scratch);
3013 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3015 * @rdev: radeon_device pointer
3016 * @fence: radeon fence object
3018 * Emits a fence sequnce number on the gfx ring and flushes
3021 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3022 struct radeon_fence *fence)
3024 struct radeon_ring *ring = &rdev->ring[fence->ring];
3025 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3027 /* EVENT_WRITE_EOP - flush caches, send int */
3028 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3029 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3031 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3033 radeon_ring_write(ring, addr & 0xfffffffc);
3034 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3035 radeon_ring_write(ring, fence->seq);
3036 radeon_ring_write(ring, 0);
3038 /* We should be using the new WAIT_REG_MEM special op packet here
3039 * but it causes the CP to hang
3041 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3042 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3043 WRITE_DATA_DST_SEL(0)));
3044 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3045 radeon_ring_write(ring, 0);
3046 radeon_ring_write(ring, 0);
3050 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3052 * @rdev: radeon_device pointer
3053 * @fence: radeon fence object
3055 * Emits a fence sequnce number on the compute ring and flushes
3058 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3059 struct radeon_fence *fence)
3061 struct radeon_ring *ring = &rdev->ring[fence->ring];
3062 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3064 /* RELEASE_MEM - flush caches, send int */
3065 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3066 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3068 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3070 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3071 radeon_ring_write(ring, addr & 0xfffffffc);
3072 radeon_ring_write(ring, upper_32_bits(addr));
3073 radeon_ring_write(ring, fence->seq);
3074 radeon_ring_write(ring, 0);
3076 /* We should be using the new WAIT_REG_MEM special op packet here
3077 * but it causes the CP to hang
3079 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3081 WRITE_DATA_DST_SEL(0)));
3082 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3083 radeon_ring_write(ring, 0);
3084 radeon_ring_write(ring, 0);
3087 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3088 struct radeon_ring *ring,
3089 struct radeon_semaphore *semaphore,
3092 uint64_t addr = semaphore->gpu_addr;
3093 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3095 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3096 radeon_ring_write(ring, addr & 0xffffffff);
3097 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3104 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3106 * @rdev: radeon_device pointer
3107 * @ib: radeon indirect buffer object
3109 * Emits an DE (drawing engine) or CE (constant engine) IB
3110 * on the gfx ring. IBs are usually generated by userspace
3111 * acceleration drivers and submitted to the kernel for
3112 * sheduling on the ring. This function schedules the IB
3113 * on the gfx ring for execution by the GPU.
3115 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3117 struct radeon_ring *ring = &rdev->ring[ib->ring];
3118 u32 header, control = INDIRECT_BUFFER_VALID;
3120 if (ib->is_const_ib) {
3121 /* set switch buffer packet before const IB */
3122 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3123 radeon_ring_write(ring, 0);
3125 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3128 if (ring->rptr_save_reg) {
3129 next_rptr = ring->wptr + 3 + 4;
3130 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3131 radeon_ring_write(ring, ((ring->rptr_save_reg -
3132 PACKET3_SET_UCONFIG_REG_START) >> 2));
3133 radeon_ring_write(ring, next_rptr);
3134 } else if (rdev->wb.enabled) {
3135 next_rptr = ring->wptr + 5 + 4;
3136 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3137 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3138 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3139 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3140 radeon_ring_write(ring, next_rptr);
3143 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3146 control |= ib->length_dw |
3147 (ib->vm ? (ib->vm->id << 24) : 0);
3149 radeon_ring_write(ring, header);
3150 radeon_ring_write(ring,
3154 (ib->gpu_addr & 0xFFFFFFFC));
3155 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3156 radeon_ring_write(ring, control);
3160 * cik_ib_test - basic gfx ring IB test
3162 * @rdev: radeon_device pointer
3163 * @ring: radeon_ring structure holding ring information
3165 * Allocate an IB and execute it on the gfx ring (CIK).
3166 * Provides a basic gfx ring test to verify that IBs are working.
3167 * Returns 0 on success, error on failure.
3169 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3171 struct radeon_ib ib;
3177 r = radeon_scratch_get(rdev, &scratch);
3179 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3182 WREG32(scratch, 0xCAFEDEAD);
3183 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3185 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3186 radeon_scratch_free(rdev, scratch);
3189 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3190 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3191 ib.ptr[2] = 0xDEADBEEF;
3193 r = radeon_ib_schedule(rdev, &ib, NULL);
3195 radeon_scratch_free(rdev, scratch);
3196 radeon_ib_free(rdev, &ib);
3197 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3200 r = radeon_fence_wait(ib.fence, false);
3202 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3203 radeon_scratch_free(rdev, scratch);
3204 radeon_ib_free(rdev, &ib);
3207 for (i = 0; i < rdev->usec_timeout; i++) {
3208 tmp = RREG32(scratch);
3209 if (tmp == 0xDEADBEEF)
3213 if (i < rdev->usec_timeout) {
3214 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3216 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3220 radeon_scratch_free(rdev, scratch);
3221 radeon_ib_free(rdev, &ib);
3227 * On CIK, gfx and compute now have independant command processors.
3230 * Gfx consists of a single ring and can process both gfx jobs and
3231 * compute jobs. The gfx CP consists of three microengines (ME):
3232 * PFP - Pre-Fetch Parser
3234 * CE - Constant Engine
3235 * The PFP and ME make up what is considered the Drawing Engine (DE).
3236 * The CE is an asynchronous engine used for updating buffer desciptors
3237 * used by the DE so that they can be loaded into cache in parallel
3238 * while the DE is processing state update packets.
3241 * The compute CP consists of two microengines (ME):
3242 * MEC1 - Compute MicroEngine 1
3243 * MEC2 - Compute MicroEngine 2
3244 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3245 * The queues are exposed to userspace and are programmed directly
3246 * by the compute runtime.
3249 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3251 * @rdev: radeon_device pointer
3252 * @enable: enable or disable the MEs
3254 * Halts or unhalts the gfx MEs.
3256 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3259 WREG32(CP_ME_CNTL, 0);
3261 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3262 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3268 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3270 * @rdev: radeon_device pointer
3272 * Loads the gfx PFP, ME, and CE ucode.
3273 * Returns 0 for success, -EINVAL if the ucode is not available.
3275 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3277 const __be32 *fw_data;
3280 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3283 cik_cp_gfx_enable(rdev, false);
3286 fw_data = (const __be32 *)rdev->pfp_fw->data;
3287 WREG32(CP_PFP_UCODE_ADDR, 0);
3288 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3289 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3290 WREG32(CP_PFP_UCODE_ADDR, 0);
3293 fw_data = (const __be32 *)rdev->ce_fw->data;
3294 WREG32(CP_CE_UCODE_ADDR, 0);
3295 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3296 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3297 WREG32(CP_CE_UCODE_ADDR, 0);
3300 fw_data = (const __be32 *)rdev->me_fw->data;
3301 WREG32(CP_ME_RAM_WADDR, 0);
3302 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3303 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3304 WREG32(CP_ME_RAM_WADDR, 0);
3306 WREG32(CP_PFP_UCODE_ADDR, 0);
3307 WREG32(CP_CE_UCODE_ADDR, 0);
3308 WREG32(CP_ME_RAM_WADDR, 0);
3309 WREG32(CP_ME_RAM_RADDR, 0);
3314 * cik_cp_gfx_start - start the gfx ring
3316 * @rdev: radeon_device pointer
3318 * Enables the ring and loads the clear state context and other
3319 * packets required to init the ring.
3320 * Returns 0 for success, error for failure.
3322 static int cik_cp_gfx_start(struct radeon_device *rdev)
3324 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3328 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3329 WREG32(CP_ENDIAN_SWAP, 0);
3330 WREG32(CP_DEVICE_ID, 1);
3332 cik_cp_gfx_enable(rdev, true);
3334 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3336 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3340 /* init the CE partitions. CE only used for gfx on CIK */
3341 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3342 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3343 radeon_ring_write(ring, 0xc000);
3344 radeon_ring_write(ring, 0xc000);
3346 /* setup clear context state */
3347 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3350 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3351 radeon_ring_write(ring, 0x80000000);
3352 radeon_ring_write(ring, 0x80000000);
3354 for (i = 0; i < cik_default_size; i++)
3355 radeon_ring_write(ring, cik_default_state[i]);
3357 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3360 /* set clear context state */
3361 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362 radeon_ring_write(ring, 0);
3364 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365 radeon_ring_write(ring, 0x00000316);
3366 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3369 radeon_ring_unlock_commit(rdev, ring);
3375 * cik_cp_gfx_fini - stop the gfx ring
3377 * @rdev: radeon_device pointer
3379 * Stop the gfx ring and tear down the driver ring
3382 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3384 cik_cp_gfx_enable(rdev, false);
3385 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3389 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3391 * @rdev: radeon_device pointer
3393 * Program the location and size of the gfx ring buffer
3394 * and test it to make sure it's working.
3395 * Returns 0 for success, error for failure.
3397 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3399 struct radeon_ring *ring;
3405 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3406 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3408 /* Set the write pointer delay */
3409 WREG32(CP_RB_WPTR_DELAY, 0);
3411 /* set the RB to use vmid 0 */
3412 WREG32(CP_RB_VMID, 0);
3414 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3416 /* ring 0 - compute and gfx */
3417 /* Set ring buffer size */
3418 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3419 rb_bufsz = order_base_2(ring->ring_size / 8);
3420 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3422 tmp |= BUF_SWAP_32BIT;
3424 WREG32(CP_RB0_CNTL, tmp);
3426 /* Initialize the ring buffer's read and write pointers */
3427 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3429 WREG32(CP_RB0_WPTR, ring->wptr);
3431 /* set the wb address wether it's enabled or not */
3432 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3433 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3435 /* scratch register shadowing is no longer supported */
3436 WREG32(SCRATCH_UMSK, 0);
3438 if (!rdev->wb.enabled)
3439 tmp |= RB_NO_UPDATE;
3442 WREG32(CP_RB0_CNTL, tmp);
3444 rb_addr = ring->gpu_addr >> 8;
3445 WREG32(CP_RB0_BASE, rb_addr);
3446 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3448 ring->rptr = RREG32(CP_RB0_RPTR);
3450 /* start the ring */
3451 cik_cp_gfx_start(rdev);
3452 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3453 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3455 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3461 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3462 struct radeon_ring *ring)
3468 if (rdev->wb.enabled) {
3469 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3471 mutex_lock(&rdev->srbm_mutex);
3472 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3473 rptr = RREG32(CP_HQD_PQ_RPTR);
3474 cik_srbm_select(rdev, 0, 0, 0, 0);
3475 mutex_unlock(&rdev->srbm_mutex);
3481 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3482 struct radeon_ring *ring)
3486 if (rdev->wb.enabled) {
3487 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3489 mutex_lock(&rdev->srbm_mutex);
3490 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3491 wptr = RREG32(CP_HQD_PQ_WPTR);
3492 cik_srbm_select(rdev, 0, 0, 0, 0);
3493 mutex_unlock(&rdev->srbm_mutex);
3499 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3500 struct radeon_ring *ring)
3502 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3503 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3507 * cik_cp_compute_enable - enable/disable the compute CP MEs
3509 * @rdev: radeon_device pointer
3510 * @enable: enable or disable the MEs
3512 * Halts or unhalts the compute MEs.
3514 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3517 WREG32(CP_MEC_CNTL, 0);
3519 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3524 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3526 * @rdev: radeon_device pointer
3528 * Loads the compute MEC1&2 ucode.
3529 * Returns 0 for success, -EINVAL if the ucode is not available.
3531 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3533 const __be32 *fw_data;
3539 cik_cp_compute_enable(rdev, false);
3542 fw_data = (const __be32 *)rdev->mec_fw->data;
3543 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3544 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3545 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3546 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3548 if (rdev->family == CHIP_KAVERI) {
3550 fw_data = (const __be32 *)rdev->mec_fw->data;
3551 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3552 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3553 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3554 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3561 * cik_cp_compute_start - start the compute queues
3563 * @rdev: radeon_device pointer
3565 * Enable the compute queues.
3566 * Returns 0 for success, error for failure.
3568 static int cik_cp_compute_start(struct radeon_device *rdev)
3570 cik_cp_compute_enable(rdev, true);
3576 * cik_cp_compute_fini - stop the compute queues
3578 * @rdev: radeon_device pointer
3580 * Stop the compute queues and tear down the driver queue
3583 static void cik_cp_compute_fini(struct radeon_device *rdev)
3587 cik_cp_compute_enable(rdev, false);
3589 for (i = 0; i < 2; i++) {
3591 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3593 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3595 if (rdev->ring[idx].mqd_obj) {
3596 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3597 if (unlikely(r != 0))
3598 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3600 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3601 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3603 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3604 rdev->ring[idx].mqd_obj = NULL;
3609 static void cik_mec_fini(struct radeon_device *rdev)
3613 if (rdev->mec.hpd_eop_obj) {
3614 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3615 if (unlikely(r != 0))
3616 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3617 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3618 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3620 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3621 rdev->mec.hpd_eop_obj = NULL;
3625 #define MEC_HPD_SIZE 2048
3627 static int cik_mec_init(struct radeon_device *rdev)
3633 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3634 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3636 if (rdev->family == CHIP_KAVERI)
3637 rdev->mec.num_mec = 2;
3639 rdev->mec.num_mec = 1;
3640 rdev->mec.num_pipe = 4;
3641 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3643 if (rdev->mec.hpd_eop_obj == NULL) {
3644 r = radeon_bo_create(rdev,
3645 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3647 RADEON_GEM_DOMAIN_GTT, NULL,
3648 &rdev->mec.hpd_eop_obj);
3650 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3655 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3656 if (unlikely(r != 0)) {
3660 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3661 &rdev->mec.hpd_eop_gpu_addr);
3663 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3667 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3669 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3674 /* clear memory. Not sure if this is required or not */
3675 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3677 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3678 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3683 struct hqd_registers
3685 u32 cp_mqd_base_addr;
3686 u32 cp_mqd_base_addr_hi;
3689 u32 cp_hqd_persistent_state;
3690 u32 cp_hqd_pipe_priority;
3691 u32 cp_hqd_queue_priority;
3694 u32 cp_hqd_pq_base_hi;
3696 u32 cp_hqd_pq_rptr_report_addr;
3697 u32 cp_hqd_pq_rptr_report_addr_hi;
3698 u32 cp_hqd_pq_wptr_poll_addr;
3699 u32 cp_hqd_pq_wptr_poll_addr_hi;
3700 u32 cp_hqd_pq_doorbell_control;
3702 u32 cp_hqd_pq_control;
3703 u32 cp_hqd_ib_base_addr;
3704 u32 cp_hqd_ib_base_addr_hi;
3706 u32 cp_hqd_ib_control;
3707 u32 cp_hqd_iq_timer;
3709 u32 cp_hqd_dequeue_request;
3710 u32 cp_hqd_dma_offload;
3711 u32 cp_hqd_sema_cmd;
3712 u32 cp_hqd_msg_type;
3713 u32 cp_hqd_atomic0_preop_lo;
3714 u32 cp_hqd_atomic0_preop_hi;
3715 u32 cp_hqd_atomic1_preop_lo;
3716 u32 cp_hqd_atomic1_preop_hi;
3717 u32 cp_hqd_hq_scheduler0;
3718 u32 cp_hqd_hq_scheduler1;
3725 u32 dispatch_initiator;
3729 u32 pipeline_stat_enable;
3730 u32 perf_counter_enable;
3736 u32 resource_limits;
3737 u32 static_thread_mgmt01[2];
3739 u32 static_thread_mgmt23[2];
3741 u32 thread_trace_enable;
3744 u32 vgtcs_invoke_count[2];
3745 struct hqd_registers queue_state;
3747 u32 interrupt_queue[64];
3751 * cik_cp_compute_resume - setup the compute queue registers
3753 * @rdev: radeon_device pointer
3755 * Program the compute queues and test them to make sure they
3757 * Returns 0 for success, error for failure.
3759 static int cik_cp_compute_resume(struct radeon_device *rdev)
3763 bool use_doorbell = true;
3769 struct bonaire_mqd *mqd;
3771 r = cik_cp_compute_start(rdev);
3775 /* fix up chicken bits */
3776 tmp = RREG32(CP_CPF_DEBUG);
3778 WREG32(CP_CPF_DEBUG, tmp);
3780 /* init the pipes */
3781 mutex_lock(&rdev->srbm_mutex);
3782 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3783 int me = (i < 4) ? 1 : 2;
3784 int pipe = (i < 4) ? i : (i - 4);
3786 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3788 cik_srbm_select(rdev, me, pipe, 0, 0);
3790 /* write the EOP addr */
3791 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3792 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3794 /* set the VMID assigned */
3795 WREG32(CP_HPD_EOP_VMID, 0);
3797 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3798 tmp = RREG32(CP_HPD_EOP_CONTROL);
3799 tmp &= ~EOP_SIZE_MASK;
3800 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3801 WREG32(CP_HPD_EOP_CONTROL, tmp);
3803 cik_srbm_select(rdev, 0, 0, 0, 0);
3804 mutex_unlock(&rdev->srbm_mutex);
3806 /* init the queues. Just two for now. */
3807 for (i = 0; i < 2; i++) {
3809 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3811 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3813 if (rdev->ring[idx].mqd_obj == NULL) {
3814 r = radeon_bo_create(rdev,
3815 sizeof(struct bonaire_mqd),
3817 RADEON_GEM_DOMAIN_GTT, NULL,
3818 &rdev->ring[idx].mqd_obj);
3820 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3825 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3826 if (unlikely(r != 0)) {
3827 cik_cp_compute_fini(rdev);
3830 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3833 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3834 cik_cp_compute_fini(rdev);
3837 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3839 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3840 cik_cp_compute_fini(rdev);
3844 /* doorbell offset */
3845 rdev->ring[idx].doorbell_offset =
3846 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3848 /* init the mqd struct */
3849 memset(buf, 0, sizeof(struct bonaire_mqd));
3851 mqd = (struct bonaire_mqd *)buf;
3852 mqd->header = 0xC0310800;
3853 mqd->static_thread_mgmt01[0] = 0xffffffff;
3854 mqd->static_thread_mgmt01[1] = 0xffffffff;
3855 mqd->static_thread_mgmt23[0] = 0xffffffff;
3856 mqd->static_thread_mgmt23[1] = 0xffffffff;
3858 mutex_lock(&rdev->srbm_mutex);
3859 cik_srbm_select(rdev, rdev->ring[idx].me,
3860 rdev->ring[idx].pipe,
3861 rdev->ring[idx].queue, 0);
3863 /* disable wptr polling */
3864 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3865 tmp &= ~WPTR_POLL_EN;
3866 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3868 /* enable doorbell? */
3869 mqd->queue_state.cp_hqd_pq_doorbell_control =
3870 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3872 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3874 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3875 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3876 mqd->queue_state.cp_hqd_pq_doorbell_control);
3878 /* disable the queue if it's active */
3879 mqd->queue_state.cp_hqd_dequeue_request = 0;
3880 mqd->queue_state.cp_hqd_pq_rptr = 0;
3881 mqd->queue_state.cp_hqd_pq_wptr= 0;
3882 if (RREG32(CP_HQD_ACTIVE) & 1) {
3883 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3884 for (i = 0; i < rdev->usec_timeout; i++) {
3885 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3889 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3890 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3891 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3894 /* set the pointer to the MQD */
3895 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3896 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3897 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3898 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3899 /* set MQD vmid to 0 */
3900 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3901 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3902 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3904 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3905 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3906 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3907 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3908 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3909 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3911 /* set up the HQD, this is similar to CP_RB0_CNTL */
3912 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3913 mqd->queue_state.cp_hqd_pq_control &=
3914 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3916 mqd->queue_state.cp_hqd_pq_control |=
3917 order_base_2(rdev->ring[idx].ring_size / 8);
3918 mqd->queue_state.cp_hqd_pq_control |=
3919 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3921 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3923 mqd->queue_state.cp_hqd_pq_control &=
3924 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3925 mqd->queue_state.cp_hqd_pq_control |=
3926 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3927 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3929 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3931 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3933 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3935 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3936 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3937 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3938 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3940 /* set the wb address wether it's enabled or not */
3942 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3944 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3946 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3947 upper_32_bits(wb_gpu_addr) & 0xffff;
3948 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3949 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3950 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3951 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3953 /* enable the doorbell if requested */
3955 mqd->queue_state.cp_hqd_pq_doorbell_control =
3956 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3957 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3958 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3959 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3960 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3961 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3962 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3965 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3967 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3968 mqd->queue_state.cp_hqd_pq_doorbell_control);
3970 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3971 rdev->ring[idx].wptr = 0;
3972 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3973 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3974 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3975 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3977 /* set the vmid for the queue */
3978 mqd->queue_state.cp_hqd_vmid = 0;
3979 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3981 /* activate the queue */
3982 mqd->queue_state.cp_hqd_active = 1;
3983 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3985 cik_srbm_select(rdev, 0, 0, 0, 0);
3986 mutex_unlock(&rdev->srbm_mutex);
3988 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3989 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3991 rdev->ring[idx].ready = true;
3992 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3994 rdev->ring[idx].ready = false;
4000 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4002 cik_cp_gfx_enable(rdev, enable);
4003 cik_cp_compute_enable(rdev, enable);
4006 static int cik_cp_load_microcode(struct radeon_device *rdev)
4010 r = cik_cp_gfx_load_microcode(rdev);
4013 r = cik_cp_compute_load_microcode(rdev);
4020 static void cik_cp_fini(struct radeon_device *rdev)
4022 cik_cp_gfx_fini(rdev);
4023 cik_cp_compute_fini(rdev);
4026 static int cik_cp_resume(struct radeon_device *rdev)
4030 cik_enable_gui_idle_interrupt(rdev, false);
4032 r = cik_cp_load_microcode(rdev);
4036 r = cik_cp_gfx_resume(rdev);
4039 r = cik_cp_compute_resume(rdev);
4043 cik_enable_gui_idle_interrupt(rdev, true);
4048 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4050 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4051 RREG32(GRBM_STATUS));
4052 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4053 RREG32(GRBM_STATUS2));
4054 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4055 RREG32(GRBM_STATUS_SE0));
4056 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4057 RREG32(GRBM_STATUS_SE1));
4058 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4059 RREG32(GRBM_STATUS_SE2));
4060 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4061 RREG32(GRBM_STATUS_SE3));
4062 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4063 RREG32(SRBM_STATUS));
4064 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4065 RREG32(SRBM_STATUS2));
4066 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4067 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4068 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4069 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4070 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4071 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4072 RREG32(CP_STALLED_STAT1));
4073 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4074 RREG32(CP_STALLED_STAT2));
4075 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4076 RREG32(CP_STALLED_STAT3));
4077 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4078 RREG32(CP_CPF_BUSY_STAT));
4079 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4080 RREG32(CP_CPF_STALLED_STAT1));
4081 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4082 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4083 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4084 RREG32(CP_CPC_STALLED_STAT1));
4085 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4089 * cik_gpu_check_soft_reset - check which blocks are busy
4091 * @rdev: radeon_device pointer
4093 * Check which blocks are busy and return the relevant reset
4094 * mask to be used by cik_gpu_soft_reset().
4095 * Returns a mask of the blocks to be reset.
4097 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4103 tmp = RREG32(GRBM_STATUS);
4104 if (tmp & (PA_BUSY | SC_BUSY |
4105 BCI_BUSY | SX_BUSY |
4106 TA_BUSY | VGT_BUSY |
4108 GDS_BUSY | SPI_BUSY |
4109 IA_BUSY | IA_BUSY_NO_DMA))
4110 reset_mask |= RADEON_RESET_GFX;
4112 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4113 reset_mask |= RADEON_RESET_CP;
4116 tmp = RREG32(GRBM_STATUS2);
4118 reset_mask |= RADEON_RESET_RLC;
4120 /* SDMA0_STATUS_REG */
4121 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4122 if (!(tmp & SDMA_IDLE))
4123 reset_mask |= RADEON_RESET_DMA;
4125 /* SDMA1_STATUS_REG */
4126 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4127 if (!(tmp & SDMA_IDLE))
4128 reset_mask |= RADEON_RESET_DMA1;
4131 tmp = RREG32(SRBM_STATUS2);
4132 if (tmp & SDMA_BUSY)
4133 reset_mask |= RADEON_RESET_DMA;
4135 if (tmp & SDMA1_BUSY)
4136 reset_mask |= RADEON_RESET_DMA1;
4139 tmp = RREG32(SRBM_STATUS);
4142 reset_mask |= RADEON_RESET_IH;
4145 reset_mask |= RADEON_RESET_SEM;
4147 if (tmp & GRBM_RQ_PENDING)
4148 reset_mask |= RADEON_RESET_GRBM;
4151 reset_mask |= RADEON_RESET_VMC;
4153 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4154 MCC_BUSY | MCD_BUSY))
4155 reset_mask |= RADEON_RESET_MC;
4157 if (evergreen_is_display_hung(rdev))
4158 reset_mask |= RADEON_RESET_DISPLAY;
4160 /* Skip MC reset as it's mostly likely not hung, just busy */
4161 if (reset_mask & RADEON_RESET_MC) {
4162 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4163 reset_mask &= ~RADEON_RESET_MC;
4170 * cik_gpu_soft_reset - soft reset GPU
4172 * @rdev: radeon_device pointer
4173 * @reset_mask: mask of which blocks to reset
4175 * Soft reset the blocks specified in @reset_mask.
4177 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4179 struct evergreen_mc_save save;
4180 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4183 if (reset_mask == 0)
4186 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4188 cik_print_gpu_status_regs(rdev);
4189 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4190 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4191 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4192 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4201 /* Disable GFX parsing/prefetching */
4202 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4204 /* Disable MEC parsing/prefetching */
4205 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4207 if (reset_mask & RADEON_RESET_DMA) {
4209 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4211 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4213 if (reset_mask & RADEON_RESET_DMA1) {
4215 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4217 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4220 evergreen_mc_stop(rdev, &save);
4221 if (evergreen_mc_wait_for_idle(rdev)) {
4222 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4225 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4226 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4228 if (reset_mask & RADEON_RESET_CP) {
4229 grbm_soft_reset |= SOFT_RESET_CP;
4231 srbm_soft_reset |= SOFT_RESET_GRBM;
4234 if (reset_mask & RADEON_RESET_DMA)
4235 srbm_soft_reset |= SOFT_RESET_SDMA;
4237 if (reset_mask & RADEON_RESET_DMA1)
4238 srbm_soft_reset |= SOFT_RESET_SDMA1;
4240 if (reset_mask & RADEON_RESET_DISPLAY)
4241 srbm_soft_reset |= SOFT_RESET_DC;
4243 if (reset_mask & RADEON_RESET_RLC)
4244 grbm_soft_reset |= SOFT_RESET_RLC;
4246 if (reset_mask & RADEON_RESET_SEM)
4247 srbm_soft_reset |= SOFT_RESET_SEM;
4249 if (reset_mask & RADEON_RESET_IH)
4250 srbm_soft_reset |= SOFT_RESET_IH;
4252 if (reset_mask & RADEON_RESET_GRBM)
4253 srbm_soft_reset |= SOFT_RESET_GRBM;
4255 if (reset_mask & RADEON_RESET_VMC)
4256 srbm_soft_reset |= SOFT_RESET_VMC;
4258 if (!(rdev->flags & RADEON_IS_IGP)) {
4259 if (reset_mask & RADEON_RESET_MC)
4260 srbm_soft_reset |= SOFT_RESET_MC;
4263 if (grbm_soft_reset) {
4264 tmp = RREG32(GRBM_SOFT_RESET);
4265 tmp |= grbm_soft_reset;
4266 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4267 WREG32(GRBM_SOFT_RESET, tmp);
4268 tmp = RREG32(GRBM_SOFT_RESET);
4272 tmp &= ~grbm_soft_reset;
4273 WREG32(GRBM_SOFT_RESET, tmp);
4274 tmp = RREG32(GRBM_SOFT_RESET);
4277 if (srbm_soft_reset) {
4278 tmp = RREG32(SRBM_SOFT_RESET);
4279 tmp |= srbm_soft_reset;
4280 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4281 WREG32(SRBM_SOFT_RESET, tmp);
4282 tmp = RREG32(SRBM_SOFT_RESET);
4286 tmp &= ~srbm_soft_reset;
4287 WREG32(SRBM_SOFT_RESET, tmp);
4288 tmp = RREG32(SRBM_SOFT_RESET);
4291 /* Wait a little for things to settle down */
4294 evergreen_mc_resume(rdev, &save);
4297 cik_print_gpu_status_regs(rdev);
4301 * cik_asic_reset - soft reset GPU
4303 * @rdev: radeon_device pointer
4305 * Look up which blocks are hung and attempt
4307 * Returns 0 for success.
4309 int cik_asic_reset(struct radeon_device *rdev)
4313 reset_mask = cik_gpu_check_soft_reset(rdev);
4316 r600_set_bios_scratch_engine_hung(rdev, true);
4318 cik_gpu_soft_reset(rdev, reset_mask);
4320 reset_mask = cik_gpu_check_soft_reset(rdev);
4323 r600_set_bios_scratch_engine_hung(rdev, false);
4329 * cik_gfx_is_lockup - check if the 3D engine is locked up
4331 * @rdev: radeon_device pointer
4332 * @ring: radeon_ring structure holding ring information
4334 * Check if the 3D engine is locked up (CIK).
4335 * Returns true if the engine is locked, false if not.
4337 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4339 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4341 if (!(reset_mask & (RADEON_RESET_GFX |
4342 RADEON_RESET_COMPUTE |
4343 RADEON_RESET_CP))) {
4344 radeon_ring_lockup_update(ring);
4347 /* force CP activities */
4348 radeon_ring_force_activity(rdev, ring);
4349 return radeon_ring_test_lockup(rdev, ring);
4354 * cik_mc_program - program the GPU memory controller
4356 * @rdev: radeon_device pointer
4358 * Set the location of vram, gart, and AGP in the GPU's
4359 * physical address space (CIK).
4361 static void cik_mc_program(struct radeon_device *rdev)
4363 struct evergreen_mc_save save;
4367 /* Initialize HDP */
4368 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4369 WREG32((0x2c14 + j), 0x00000000);
4370 WREG32((0x2c18 + j), 0x00000000);
4371 WREG32((0x2c1c + j), 0x00000000);
4372 WREG32((0x2c20 + j), 0x00000000);
4373 WREG32((0x2c24 + j), 0x00000000);
4375 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4377 evergreen_mc_stop(rdev, &save);
4378 if (radeon_mc_wait_for_idle(rdev)) {
4379 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4381 /* Lockout access through VGA aperture*/
4382 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4383 /* Update configuration */
4384 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4385 rdev->mc.vram_start >> 12);
4386 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4387 rdev->mc.vram_end >> 12);
4388 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4389 rdev->vram_scratch.gpu_addr >> 12);
4390 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4391 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4392 WREG32(MC_VM_FB_LOCATION, tmp);
4393 /* XXX double check these! */
4394 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4395 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4396 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4397 WREG32(MC_VM_AGP_BASE, 0);
4398 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4399 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4400 if (radeon_mc_wait_for_idle(rdev)) {
4401 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4403 evergreen_mc_resume(rdev, &save);
4404 /* we need to own VRAM, so turn off the VGA renderer here
4405 * to stop it overwriting our objects */
4406 rv515_vga_render_disable(rdev);
4410 * cik_mc_init - initialize the memory controller driver params
4412 * @rdev: radeon_device pointer
4414 * Look up the amount of vram, vram width, and decide how to place
4415 * vram and gart within the GPU's physical address space (CIK).
4416 * Returns 0 for success.
4418 static int cik_mc_init(struct radeon_device *rdev)
4421 int chansize, numchan;
4423 /* Get VRAM informations */
4424 rdev->mc.vram_is_ddr = true;
4425 tmp = RREG32(MC_ARB_RAMCFG);
4426 if (tmp & CHANSIZE_MASK) {
4431 tmp = RREG32(MC_SHARED_CHMAP);
4432 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4462 rdev->mc.vram_width = numchan * chansize;
4463 /* Could aper size report 0 ? */
4464 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4465 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4466 /* size in MB on si */
4467 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4468 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4469 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4470 si_vram_gtt_location(rdev, &rdev->mc);
4471 radeon_update_bandwidth_info(rdev);
4478 * VMID 0 is the physical GPU addresses as used by the kernel.
4479 * VMIDs 1-15 are used for userspace clients and are handled
4480 * by the radeon vm/hsa code.
4483 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4485 * @rdev: radeon_device pointer
4487 * Flush the TLB for the VMID 0 page table (CIK).
4489 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4491 /* flush hdp cache */
4492 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4494 /* bits 0-15 are the VM contexts0-15 */
4495 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4499 * cik_pcie_gart_enable - gart enable
4501 * @rdev: radeon_device pointer
4503 * This sets up the TLBs, programs the page tables for VMID0,
4504 * sets up the hw for VMIDs 1-15 which are allocated on
4505 * demand, and sets up the global locations for the LDS, GDS,
4506 * and GPUVM for FSA64 clients (CIK).
4507 * Returns 0 for success, errors for failure.
4509 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4513 if (rdev->gart.robj == NULL) {
4514 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4517 r = radeon_gart_table_vram_pin(rdev);
4520 radeon_gart_restore(rdev);
4521 /* Setup TLB control */
4522 WREG32(MC_VM_MX_L1_TLB_CNTL,
4525 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4526 ENABLE_ADVANCED_DRIVER_MODEL |
4527 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4528 /* Setup L2 cache */
4529 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4530 ENABLE_L2_FRAGMENT_PROCESSING |
4531 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4532 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4533 EFFECTIVE_L2_QUEUE_SIZE(7) |
4534 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4535 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4536 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4537 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4538 /* setup context0 */
4539 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4540 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4541 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4542 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4543 (u32)(rdev->dummy_page.addr >> 12));
4544 WREG32(VM_CONTEXT0_CNTL2, 0);
4545 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4546 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4552 /* empty context1-15 */
4553 /* FIXME start with 4G, once using 2 level pt switch to full
4556 /* set vm size, must be a multiple of 4 */
4557 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4558 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4559 for (i = 1; i < 16; i++) {
4561 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4562 rdev->gart.table_addr >> 12);
4564 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4565 rdev->gart.table_addr >> 12);
4568 /* enable context1-15 */
4569 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4570 (u32)(rdev->dummy_page.addr >> 12));
4571 WREG32(VM_CONTEXT1_CNTL2, 4);
4572 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4573 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4577 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4579 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4581 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4582 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4583 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4584 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4586 /* TC cache setup ??? */
4587 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4588 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4589 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4591 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4592 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4593 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4594 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4595 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4597 WREG32(TC_CFG_L1_VOLATILE, 0);
4598 WREG32(TC_CFG_L2_VOLATILE, 0);
4600 if (rdev->family == CHIP_KAVERI) {
4601 u32 tmp = RREG32(CHUB_CONTROL);
4603 WREG32(CHUB_CONTROL, tmp);
4606 /* XXX SH_MEM regs */
4607 /* where to put LDS, scratch, GPUVM in FSA64 space */
4608 mutex_lock(&rdev->srbm_mutex);
4609 for (i = 0; i < 16; i++) {
4610 cik_srbm_select(rdev, 0, 0, 0, i);
4611 /* CP and shaders */
4612 WREG32(SH_MEM_CONFIG, 0);
4613 WREG32(SH_MEM_APE1_BASE, 1);
4614 WREG32(SH_MEM_APE1_LIMIT, 0);
4615 WREG32(SH_MEM_BASES, 0);
4617 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4618 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4619 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4620 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4621 /* XXX SDMA RLC - todo */
4623 cik_srbm_select(rdev, 0, 0, 0, 0);
4624 mutex_unlock(&rdev->srbm_mutex);
4626 cik_pcie_gart_tlb_flush(rdev);
4627 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4628 (unsigned)(rdev->mc.gtt_size >> 20),
4629 (unsigned long long)rdev->gart.table_addr);
4630 rdev->gart.ready = true;
4635 * cik_pcie_gart_disable - gart disable
4637 * @rdev: radeon_device pointer
4639 * This disables all VM page table (CIK).
4641 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4643 /* Disable all tables */
4644 WREG32(VM_CONTEXT0_CNTL, 0);
4645 WREG32(VM_CONTEXT1_CNTL, 0);
4646 /* Setup TLB control */
4647 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4648 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4649 /* Setup L2 cache */
4651 ENABLE_L2_FRAGMENT_PROCESSING |
4652 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4653 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4654 EFFECTIVE_L2_QUEUE_SIZE(7) |
4655 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4656 WREG32(VM_L2_CNTL2, 0);
4657 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4658 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4659 radeon_gart_table_vram_unpin(rdev);
4663 * cik_pcie_gart_fini - vm fini callback
4665 * @rdev: radeon_device pointer
4667 * Tears down the driver GART/VM setup (CIK).
4669 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4671 cik_pcie_gart_disable(rdev);
4672 radeon_gart_table_vram_free(rdev);
4673 radeon_gart_fini(rdev);
4678 * cik_ib_parse - vm ib_parse callback
4680 * @rdev: radeon_device pointer
4681 * @ib: indirect buffer pointer
4683 * CIK uses hw IB checking so this is a nop (CIK).
4685 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4692 * VMID 0 is the physical GPU addresses as used by the kernel.
4693 * VMIDs 1-15 are used for userspace clients and are handled
4694 * by the radeon vm/hsa code.
4697 * cik_vm_init - cik vm init callback
4699 * @rdev: radeon_device pointer
4701 * Inits cik specific vm parameters (number of VMs, base of vram for
4702 * VMIDs 1-15) (CIK).
4703 * Returns 0 for success.
4705 int cik_vm_init(struct radeon_device *rdev)
4708 rdev->vm_manager.nvm = 16;
4709 /* base offset of vram pages */
4710 if (rdev->flags & RADEON_IS_IGP) {
4711 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4713 rdev->vm_manager.vram_base_offset = tmp;
4715 rdev->vm_manager.vram_base_offset = 0;
4721 * cik_vm_fini - cik vm fini callback
4723 * @rdev: radeon_device pointer
4725 * Tear down any asic specific VM setup (CIK).
4727 void cik_vm_fini(struct radeon_device *rdev)
4732 * cik_vm_decode_fault - print human readable fault info
4734 * @rdev: radeon_device pointer
4735 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4736 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4738 * Print human readable fault information (CIK).
4740 static void cik_vm_decode_fault(struct radeon_device *rdev,
4741 u32 status, u32 addr, u32 mc_client)
4743 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4744 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4745 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4746 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4747 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4749 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4750 protections, vmid, addr,
4751 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4752 block, mc_client, mc_id);
4756 * cik_vm_flush - cik vm flush using the CP
4758 * @rdev: radeon_device pointer
4760 * Update the page table base and flush the VM TLB
4761 * using the CP (CIK).
4763 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4765 struct radeon_ring *ring = &rdev->ring[ridx];
4770 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772 WRITE_DATA_DST_SEL(0)));
4774 radeon_ring_write(ring,
4775 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4777 radeon_ring_write(ring,
4778 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4780 radeon_ring_write(ring, 0);
4781 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4783 /* update SH_MEM_* regs */
4784 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4785 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4786 WRITE_DATA_DST_SEL(0)));
4787 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4788 radeon_ring_write(ring, 0);
4789 radeon_ring_write(ring, VMID(vm->id));
4791 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4792 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793 WRITE_DATA_DST_SEL(0)));
4794 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4795 radeon_ring_write(ring, 0);
4797 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4798 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4799 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4800 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4802 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4803 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4804 WRITE_DATA_DST_SEL(0)));
4805 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4806 radeon_ring_write(ring, 0);
4807 radeon_ring_write(ring, VMID(0));
4810 /* We should be using the WAIT_REG_MEM packet here like in
4811 * cik_fence_ring_emit(), but it causes the CP to hang in this
4814 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4816 WRITE_DATA_DST_SEL(0)));
4817 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4818 radeon_ring_write(ring, 0);
4819 radeon_ring_write(ring, 0);
4821 /* bits 0-15 are the VM contexts0-15 */
4822 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4823 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4824 WRITE_DATA_DST_SEL(0)));
4825 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4826 radeon_ring_write(ring, 0);
4827 radeon_ring_write(ring, 1 << vm->id);
4829 /* compute doesn't have PFP */
4830 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4831 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4832 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4833 radeon_ring_write(ring, 0x0);
4838 * cik_vm_set_page - update the page tables using sDMA
4840 * @rdev: radeon_device pointer
4841 * @ib: indirect buffer to fill with commands
4842 * @pe: addr of the page entry
4843 * @addr: dst addr to write into pe
4844 * @count: number of page entries to update
4845 * @incr: increase next addr by incr bytes
4846 * @flags: access flags
4848 * Update the page tables using CP or sDMA (CIK).
4850 void cik_vm_set_page(struct radeon_device *rdev,
4851 struct radeon_ib *ib,
4853 uint64_t addr, unsigned count,
4854 uint32_t incr, uint32_t flags)
4856 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4860 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4863 ndw = 2 + count * 2;
4867 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4868 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4869 WRITE_DATA_DST_SEL(1));
4870 ib->ptr[ib->length_dw++] = pe;
4871 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4872 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4873 if (flags & RADEON_VM_PAGE_SYSTEM) {
4874 value = radeon_vm_map_gart(rdev, addr);
4875 value &= 0xFFFFFFFFFFFFF000ULL;
4876 } else if (flags & RADEON_VM_PAGE_VALID) {
4882 value |= r600_flags;
4883 ib->ptr[ib->length_dw++] = value;
4884 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4889 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4895 * The RLC is a multi-purpose microengine that handles a
4896 * variety of functions, the most important of which is
4897 * the interrupt controller.
4899 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4902 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4905 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4907 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4908 WREG32(CP_INT_CNTL_RING0, tmp);
4911 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4915 tmp = RREG32(RLC_LB_CNTL);
4917 tmp |= LOAD_BALANCE_ENABLE;
4919 tmp &= ~LOAD_BALANCE_ENABLE;
4920 WREG32(RLC_LB_CNTL, tmp);
4923 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4928 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4929 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4930 cik_select_se_sh(rdev, i, j);
4931 for (k = 0; k < rdev->usec_timeout; k++) {
4932 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4938 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4940 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4941 for (k = 0; k < rdev->usec_timeout; k++) {
4942 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4948 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4952 tmp = RREG32(RLC_CNTL);
4954 WREG32(RLC_CNTL, rlc);
4957 static u32 cik_halt_rlc(struct radeon_device *rdev)
4961 orig = data = RREG32(RLC_CNTL);
4963 if (data & RLC_ENABLE) {
4966 data &= ~RLC_ENABLE;
4967 WREG32(RLC_CNTL, data);
4969 for (i = 0; i < rdev->usec_timeout; i++) {
4970 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4975 cik_wait_for_rlc_serdes(rdev);
4981 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4985 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4986 WREG32(RLC_GPR_REG2, tmp);
4988 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4989 for (i = 0; i < rdev->usec_timeout; i++) {
4990 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4995 for (i = 0; i < rdev->usec_timeout; i++) {
4996 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5002 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5006 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5007 WREG32(RLC_GPR_REG2, tmp);
5011 * cik_rlc_stop - stop the RLC ME
5013 * @rdev: radeon_device pointer
5015 * Halt the RLC ME (MicroEngine) (CIK).
5017 static void cik_rlc_stop(struct radeon_device *rdev)
5019 WREG32(RLC_CNTL, 0);
5021 cik_enable_gui_idle_interrupt(rdev, false);
5023 cik_wait_for_rlc_serdes(rdev);
5027 * cik_rlc_start - start the RLC ME
5029 * @rdev: radeon_device pointer
5031 * Unhalt the RLC ME (MicroEngine) (CIK).
5033 static void cik_rlc_start(struct radeon_device *rdev)
5035 WREG32(RLC_CNTL, RLC_ENABLE);
5037 cik_enable_gui_idle_interrupt(rdev, true);
5043 * cik_rlc_resume - setup the RLC hw
5045 * @rdev: radeon_device pointer
5047 * Initialize the RLC registers, load the ucode,
5048 * and start the RLC (CIK).
5049 * Returns 0 for success, -EINVAL if the ucode is not available.
5051 static int cik_rlc_resume(struct radeon_device *rdev)
5054 const __be32 *fw_data;
5059 switch (rdev->family) {
5062 size = BONAIRE_RLC_UCODE_SIZE;
5065 size = KV_RLC_UCODE_SIZE;
5068 size = KB_RLC_UCODE_SIZE;
5075 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5076 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5084 WREG32(RLC_LB_CNTR_INIT, 0);
5085 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5087 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5088 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5089 WREG32(RLC_LB_PARAMS, 0x00600408);
5090 WREG32(RLC_LB_CNTL, 0x80000004);
5092 WREG32(RLC_MC_CNTL, 0);
5093 WREG32(RLC_UCODE_CNTL, 0);
5095 fw_data = (const __be32 *)rdev->rlc_fw->data;
5096 WREG32(RLC_GPM_UCODE_ADDR, 0);
5097 for (i = 0; i < size; i++)
5098 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5099 WREG32(RLC_GPM_UCODE_ADDR, 0);
5101 /* XXX - find out what chips support lbpw */
5102 cik_enable_lbpw(rdev, false);
5104 if (rdev->family == CHIP_BONAIRE)
5105 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5107 cik_rlc_start(rdev);
5112 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5114 u32 data, orig, tmp, tmp2;
5116 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5118 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5119 cik_enable_gui_idle_interrupt(rdev, true);
5121 tmp = cik_halt_rlc(rdev);
5123 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5124 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5125 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5126 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5127 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5129 cik_update_rlc(rdev, tmp);
5131 data |= CGCG_EN | CGLS_EN;
5133 cik_enable_gui_idle_interrupt(rdev, false);
5135 RREG32(CB_CGTT_SCLK_CTRL);
5136 RREG32(CB_CGTT_SCLK_CTRL);
5137 RREG32(CB_CGTT_SCLK_CTRL);
5138 RREG32(CB_CGTT_SCLK_CTRL);
5140 data &= ~(CGCG_EN | CGLS_EN);
5144 WREG32(RLC_CGCG_CGLS_CTRL, data);
5148 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5150 u32 data, orig, tmp = 0;
5152 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5153 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5154 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5155 orig = data = RREG32(CP_MEM_SLP_CNTL);
5156 data |= CP_MEM_LS_EN;
5158 WREG32(CP_MEM_SLP_CNTL, data);
5162 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5165 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5167 tmp = cik_halt_rlc(rdev);
5169 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5170 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5171 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5172 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5173 WREG32(RLC_SERDES_WR_CTRL, data);
5175 cik_update_rlc(rdev, tmp);
5177 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5178 orig = data = RREG32(CGTS_SM_CTRL_REG);
5179 data &= ~SM_MODE_MASK;
5180 data |= SM_MODE(0x2);
5181 data |= SM_MODE_ENABLE;
5182 data &= ~CGTS_OVERRIDE;
5183 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5184 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5185 data &= ~CGTS_LS_OVERRIDE;
5186 data &= ~ON_MONITOR_ADD_MASK;
5187 data |= ON_MONITOR_ADD_EN;
5188 data |= ON_MONITOR_ADD(0x96);
5190 WREG32(CGTS_SM_CTRL_REG, data);
5193 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5196 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5198 data = RREG32(RLC_MEM_SLP_CNTL);
5199 if (data & RLC_MEM_LS_EN) {
5200 data &= ~RLC_MEM_LS_EN;
5201 WREG32(RLC_MEM_SLP_CNTL, data);
5204 data = RREG32(CP_MEM_SLP_CNTL);
5205 if (data & CP_MEM_LS_EN) {
5206 data &= ~CP_MEM_LS_EN;
5207 WREG32(CP_MEM_SLP_CNTL, data);
5210 orig = data = RREG32(CGTS_SM_CTRL_REG);
5211 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5213 WREG32(CGTS_SM_CTRL_REG, data);
5215 tmp = cik_halt_rlc(rdev);
5217 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5218 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5219 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5220 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5221 WREG32(RLC_SERDES_WR_CTRL, data);
5223 cik_update_rlc(rdev, tmp);
5227 static const u32 mc_cg_registers[] =
5240 static void cik_enable_mc_ls(struct radeon_device *rdev,
5246 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5247 orig = data = RREG32(mc_cg_registers[i]);
5248 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5249 data |= MC_LS_ENABLE;
5251 data &= ~MC_LS_ENABLE;
5253 WREG32(mc_cg_registers[i], data);
5257 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5263 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5264 orig = data = RREG32(mc_cg_registers[i]);
5265 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5266 data |= MC_CG_ENABLE;
5268 data &= ~MC_CG_ENABLE;
5270 WREG32(mc_cg_registers[i], data);
5274 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5279 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5280 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5281 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5283 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5286 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5288 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5291 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5295 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5300 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5301 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5304 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5306 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5309 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5311 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5314 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5316 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5319 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5323 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5328 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5329 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5331 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5333 orig = data = RREG32(UVD_CGC_CTRL);
5336 WREG32(UVD_CGC_CTRL, data);
5338 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5340 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5342 orig = data = RREG32(UVD_CGC_CTRL);
5345 WREG32(UVD_CGC_CTRL, data);
5349 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5354 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5356 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5357 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5358 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5360 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5361 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5364 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5367 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5372 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5374 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5375 data &= ~CLOCK_GATING_DIS;
5377 data |= CLOCK_GATING_DIS;
5380 WREG32(HDP_HOST_PATH_CNTL, data);
5383 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5388 orig = data = RREG32(HDP_MEM_POWER_LS);
5390 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5391 data |= HDP_LS_ENABLE;
5393 data &= ~HDP_LS_ENABLE;
5396 WREG32(HDP_MEM_POWER_LS, data);
5399 void cik_update_cg(struct radeon_device *rdev,
5400 u32 block, bool enable)
5403 if (block & RADEON_CG_BLOCK_GFX) {
5404 cik_enable_gui_idle_interrupt(rdev, false);
5405 /* order matters! */
5407 cik_enable_mgcg(rdev, true);
5408 cik_enable_cgcg(rdev, true);
5410 cik_enable_cgcg(rdev, false);
5411 cik_enable_mgcg(rdev, false);
5413 cik_enable_gui_idle_interrupt(rdev, true);
5416 if (block & RADEON_CG_BLOCK_MC) {
5417 if (!(rdev->flags & RADEON_IS_IGP)) {
5418 cik_enable_mc_mgcg(rdev, enable);
5419 cik_enable_mc_ls(rdev, enable);
5423 if (block & RADEON_CG_BLOCK_SDMA) {
5424 cik_enable_sdma_mgcg(rdev, enable);
5425 cik_enable_sdma_mgls(rdev, enable);
5428 if (block & RADEON_CG_BLOCK_BIF) {
5429 cik_enable_bif_mgls(rdev, enable);
5432 if (block & RADEON_CG_BLOCK_UVD) {
5434 cik_enable_uvd_mgcg(rdev, enable);
5437 if (block & RADEON_CG_BLOCK_HDP) {
5438 cik_enable_hdp_mgcg(rdev, enable);
5439 cik_enable_hdp_ls(rdev, enable);
5443 static void cik_init_cg(struct radeon_device *rdev)
5446 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5449 si_init_uvd_internal_cg(rdev);
5451 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452 RADEON_CG_BLOCK_SDMA |
5453 RADEON_CG_BLOCK_BIF |
5454 RADEON_CG_BLOCK_UVD |
5455 RADEON_CG_BLOCK_HDP), true);
5458 static void cik_fini_cg(struct radeon_device *rdev)
5460 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5461 RADEON_CG_BLOCK_SDMA |
5462 RADEON_CG_BLOCK_BIF |
5463 RADEON_CG_BLOCK_UVD |
5464 RADEON_CG_BLOCK_HDP), false);
5466 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5469 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5474 orig = data = RREG32(RLC_PG_CNTL);
5475 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5476 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5478 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5480 WREG32(RLC_PG_CNTL, data);
5483 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5488 orig = data = RREG32(RLC_PG_CNTL);
5489 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5490 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5492 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5494 WREG32(RLC_PG_CNTL, data);
5497 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5501 orig = data = RREG32(RLC_PG_CNTL);
5502 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5503 data &= ~DISABLE_CP_PG;
5505 data |= DISABLE_CP_PG;
5507 WREG32(RLC_PG_CNTL, data);
5510 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5514 orig = data = RREG32(RLC_PG_CNTL);
5515 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5516 data &= ~DISABLE_GDS_PG;
5518 data |= DISABLE_GDS_PG;
5520 WREG32(RLC_PG_CNTL, data);
5523 #define CP_ME_TABLE_SIZE 96
5524 #define CP_ME_TABLE_OFFSET 2048
5525 #define CP_MEC_TABLE_OFFSET 4096
5527 void cik_init_cp_pg_table(struct radeon_device *rdev)
5529 const __be32 *fw_data;
5530 volatile u32 *dst_ptr;
5531 int me, i, max_me = 4;
5535 if (rdev->family == CHIP_KAVERI)
5538 if (rdev->rlc.cp_table_ptr == NULL)
5541 /* write the cp table buffer */
5542 dst_ptr = rdev->rlc.cp_table_ptr;
5543 for (me = 0; me < max_me; me++) {
5545 fw_data = (const __be32 *)rdev->ce_fw->data;
5546 table_offset = CP_ME_TABLE_OFFSET;
5547 } else if (me == 1) {
5548 fw_data = (const __be32 *)rdev->pfp_fw->data;
5549 table_offset = CP_ME_TABLE_OFFSET;
5550 } else if (me == 2) {
5551 fw_data = (const __be32 *)rdev->me_fw->data;
5552 table_offset = CP_ME_TABLE_OFFSET;
5554 fw_data = (const __be32 *)rdev->mec_fw->data;
5555 table_offset = CP_MEC_TABLE_OFFSET;
5558 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5559 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5561 bo_offset += CP_ME_TABLE_SIZE;
5565 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5570 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5571 orig = data = RREG32(RLC_PG_CNTL);
5572 data |= GFX_PG_ENABLE;
5574 WREG32(RLC_PG_CNTL, data);
5576 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5579 WREG32(RLC_AUTO_PG_CTRL, data);
5581 orig = data = RREG32(RLC_PG_CNTL);
5582 data &= ~GFX_PG_ENABLE;
5584 WREG32(RLC_PG_CNTL, data);
5586 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5587 data &= ~AUTO_PG_EN;
5589 WREG32(RLC_AUTO_PG_CTRL, data);
5591 data = RREG32(DB_RENDER_CONTROL);
5595 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5597 u32 mask = 0, tmp, tmp1;
5600 cik_select_se_sh(rdev, se, sh);
5601 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5602 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5603 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5610 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5615 return (~tmp) & mask;
5618 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5620 u32 i, j, k, active_cu_number = 0;
5621 u32 mask, counter, cu_bitmap;
5624 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5625 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5629 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5630 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5638 active_cu_number += counter;
5639 tmp |= (cu_bitmap << (i * 16 + j * 8));
5643 WREG32(RLC_PG_AO_CU_MASK, tmp);
5645 tmp = RREG32(RLC_MAX_PG_CU);
5646 tmp &= ~MAX_PU_CU_MASK;
5647 tmp |= MAX_PU_CU(active_cu_number);
5648 WREG32(RLC_MAX_PG_CU, tmp);
5651 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5656 orig = data = RREG32(RLC_PG_CNTL);
5657 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5658 data |= STATIC_PER_CU_PG_ENABLE;
5660 data &= ~STATIC_PER_CU_PG_ENABLE;
5662 WREG32(RLC_PG_CNTL, data);
5665 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5670 orig = data = RREG32(RLC_PG_CNTL);
5671 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5672 data |= DYN_PER_CU_PG_ENABLE;
5674 data &= ~DYN_PER_CU_PG_ENABLE;
5676 WREG32(RLC_PG_CNTL, data);
5679 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5680 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5682 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5687 if (rdev->rlc.cs_data) {
5688 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5689 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5690 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5691 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5693 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5694 for (i = 0; i < 3; i++)
5695 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5697 if (rdev->rlc.reg_list) {
5698 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5699 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5700 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5703 orig = data = RREG32(RLC_PG_CNTL);
5706 WREG32(RLC_PG_CNTL, data);
5708 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5709 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5711 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5712 data &= ~IDLE_POLL_COUNT_MASK;
5713 data |= IDLE_POLL_COUNT(0x60);
5714 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5717 WREG32(RLC_PG_DELAY, data);
5719 data = RREG32(RLC_PG_DELAY_2);
5722 WREG32(RLC_PG_DELAY_2, data);
5724 data = RREG32(RLC_AUTO_PG_CTRL);
5725 data &= ~GRBM_REG_SGIT_MASK;
5726 data |= GRBM_REG_SGIT(0x700);
5727 WREG32(RLC_AUTO_PG_CTRL, data);
5731 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5733 cik_enable_gfx_cgpg(rdev, enable);
5734 cik_enable_gfx_static_mgpg(rdev, enable);
5735 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5738 u32 cik_get_csb_size(struct radeon_device *rdev)
5741 const struct cs_section_def *sect = NULL;
5742 const struct cs_extent_def *ext = NULL;
5744 if (rdev->rlc.cs_data == NULL)
5747 /* begin clear state */
5749 /* context control state */
5752 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5753 for (ext = sect->section; ext->extent != NULL; ++ext) {
5754 if (sect->id == SECT_CONTEXT)
5755 count += 2 + ext->reg_count;
5760 /* pa_sc_raster_config/pa_sc_raster_config1 */
5762 /* end clear state */
5770 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5773 const struct cs_section_def *sect = NULL;
5774 const struct cs_extent_def *ext = NULL;
5776 if (rdev->rlc.cs_data == NULL)
5781 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5782 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5784 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5785 buffer[count++] = 0x80000000;
5786 buffer[count++] = 0x80000000;
5788 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5789 for (ext = sect->section; ext->extent != NULL; ++ext) {
5790 if (sect->id == SECT_CONTEXT) {
5791 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5792 buffer[count++] = ext->reg_index - 0xa000;
5793 for (i = 0; i < ext->reg_count; i++)
5794 buffer[count++] = ext->extent[i];
5801 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5802 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5803 switch (rdev->family) {
5805 buffer[count++] = 0x16000012;
5806 buffer[count++] = 0x00000000;
5809 buffer[count++] = 0x00000000; /* XXX */
5810 buffer[count++] = 0x00000000;
5813 buffer[count++] = 0x00000000; /* XXX */
5814 buffer[count++] = 0x00000000;
5817 buffer[count++] = 0x00000000;
5818 buffer[count++] = 0x00000000;
5822 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5823 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5825 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5826 buffer[count++] = 0;
5829 static void cik_init_pg(struct radeon_device *rdev)
5831 if (rdev->pg_flags) {
5832 cik_enable_sck_slowdown_on_pu(rdev, true);
5833 cik_enable_sck_slowdown_on_pd(rdev, true);
5834 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5835 cik_init_gfx_cgpg(rdev);
5836 cik_enable_cp_pg(rdev, true);
5837 cik_enable_gds_pg(rdev, true);
5839 cik_init_ao_cu_mask(rdev);
5840 cik_update_gfx_pg(rdev, true);
5844 static void cik_fini_pg(struct radeon_device *rdev)
5846 if (rdev->pg_flags) {
5847 cik_update_gfx_pg(rdev, false);
5848 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5849 cik_enable_cp_pg(rdev, false);
5850 cik_enable_gds_pg(rdev, false);
5857 * Starting with r6xx, interrupts are handled via a ring buffer.
5858 * Ring buffers are areas of GPU accessible memory that the GPU
5859 * writes interrupt vectors into and the host reads vectors out of.
5860 * There is a rptr (read pointer) that determines where the
5861 * host is currently reading, and a wptr (write pointer)
5862 * which determines where the GPU has written. When the
5863 * pointers are equal, the ring is idle. When the GPU
5864 * writes vectors to the ring buffer, it increments the
5865 * wptr. When there is an interrupt, the host then starts
5866 * fetching commands and processing them until the pointers are
5867 * equal again at which point it updates the rptr.
5871 * cik_enable_interrupts - Enable the interrupt ring buffer
5873 * @rdev: radeon_device pointer
5875 * Enable the interrupt ring buffer (CIK).
5877 static void cik_enable_interrupts(struct radeon_device *rdev)
5879 u32 ih_cntl = RREG32(IH_CNTL);
5880 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5882 ih_cntl |= ENABLE_INTR;
5883 ih_rb_cntl |= IH_RB_ENABLE;
5884 WREG32(IH_CNTL, ih_cntl);
5885 WREG32(IH_RB_CNTL, ih_rb_cntl);
5886 rdev->ih.enabled = true;
5890 * cik_disable_interrupts - Disable the interrupt ring buffer
5892 * @rdev: radeon_device pointer
5894 * Disable the interrupt ring buffer (CIK).
5896 static void cik_disable_interrupts(struct radeon_device *rdev)
5898 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5899 u32 ih_cntl = RREG32(IH_CNTL);
5901 ih_rb_cntl &= ~IH_RB_ENABLE;
5902 ih_cntl &= ~ENABLE_INTR;
5903 WREG32(IH_RB_CNTL, ih_rb_cntl);
5904 WREG32(IH_CNTL, ih_cntl);
5905 /* set rptr, wptr to 0 */
5906 WREG32(IH_RB_RPTR, 0);
5907 WREG32(IH_RB_WPTR, 0);
5908 rdev->ih.enabled = false;
5913 * cik_disable_interrupt_state - Disable all interrupt sources
5915 * @rdev: radeon_device pointer
5917 * Clear all interrupt enable bits used by the driver (CIK).
5919 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5924 tmp = RREG32(CP_INT_CNTL_RING0) &
5925 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5926 WREG32(CP_INT_CNTL_RING0, tmp);
5928 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5929 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5930 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5931 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5932 /* compute queues */
5933 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5934 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5935 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5936 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5937 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5938 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5939 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5940 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5942 WREG32(GRBM_INT_CNTL, 0);
5943 /* vline/vblank, etc. */
5944 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5945 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5946 if (rdev->num_crtc >= 4) {
5947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5948 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5950 if (rdev->num_crtc >= 6) {
5951 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5952 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5956 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5958 /* digital hotplug */
5959 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 WREG32(DC_HPD1_INT_CONTROL, tmp);
5961 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 WREG32(DC_HPD2_INT_CONTROL, tmp);
5963 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 WREG32(DC_HPD3_INT_CONTROL, tmp);
5965 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 WREG32(DC_HPD4_INT_CONTROL, tmp);
5967 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968 WREG32(DC_HPD5_INT_CONTROL, tmp);
5969 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5970 WREG32(DC_HPD6_INT_CONTROL, tmp);
5975 * cik_irq_init - init and enable the interrupt ring
5977 * @rdev: radeon_device pointer
5979 * Allocate a ring buffer for the interrupt controller,
5980 * enable the RLC, disable interrupts, enable the IH
5981 * ring buffer and enable it (CIK).
5982 * Called at device load and reume.
5983 * Returns 0 for success, errors for failure.
5985 static int cik_irq_init(struct radeon_device *rdev)
5989 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5992 ret = r600_ih_ring_alloc(rdev);
5997 cik_disable_interrupts(rdev);
6000 ret = cik_rlc_resume(rdev);
6002 r600_ih_ring_fini(rdev);
6006 /* setup interrupt control */
6007 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6008 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6009 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6010 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6011 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6013 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6014 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6015 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6016 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6018 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6019 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6021 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6022 IH_WPTR_OVERFLOW_CLEAR |
6025 if (rdev->wb.enabled)
6026 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6028 /* set the writeback address whether it's enabled or not */
6029 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6030 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6032 WREG32(IH_RB_CNTL, ih_rb_cntl);
6034 /* set rptr, wptr to 0 */
6035 WREG32(IH_RB_RPTR, 0);
6036 WREG32(IH_RB_WPTR, 0);
6038 /* Default settings for IH_CNTL (disabled at first) */
6039 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6040 /* RPTR_REARM only works if msi's are enabled */
6041 if (rdev->msi_enabled)
6042 ih_cntl |= RPTR_REARM;
6043 WREG32(IH_CNTL, ih_cntl);
6045 /* force the active interrupt state to all disabled */
6046 cik_disable_interrupt_state(rdev);
6048 pci_set_master(rdev->pdev);
6051 cik_enable_interrupts(rdev);
6057 * cik_irq_set - enable/disable interrupt sources
6059 * @rdev: radeon_device pointer
6061 * Enable interrupt sources on the GPU (vblanks, hpd,
6063 * Returns 0 for success, errors for failure.
6065 int cik_irq_set(struct radeon_device *rdev)
6068 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6069 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6070 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6071 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6072 u32 grbm_int_cntl = 0;
6073 u32 dma_cntl, dma_cntl1;
6076 if (!rdev->irq.installed) {
6077 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6080 /* don't enable anything if the ih is disabled */
6081 if (!rdev->ih.enabled) {
6082 cik_disable_interrupts(rdev);
6083 /* force the active interrupt state to all disabled */
6084 cik_disable_interrupt_state(rdev);
6088 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6089 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6090 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6092 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6095 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6096 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6097 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6099 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6100 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6102 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6107 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6108 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6109 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6111 if (rdev->flags & RADEON_IS_IGP)
6112 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6113 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6115 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6116 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6118 /* enable CP interrupts on all rings */
6119 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6120 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6121 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6123 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6124 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6125 DRM_DEBUG("si_irq_set: sw int cp1\n");
6126 if (ring->me == 1) {
6127 switch (ring->pipe) {
6129 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6132 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6135 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6138 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6141 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6144 } else if (ring->me == 2) {
6145 switch (ring->pipe) {
6147 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6150 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6153 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6156 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6159 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6163 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6166 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6167 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6168 DRM_DEBUG("si_irq_set: sw int cp2\n");
6169 if (ring->me == 1) {
6170 switch (ring->pipe) {
6172 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6175 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6178 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6181 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6184 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6187 } else if (ring->me == 2) {
6188 switch (ring->pipe) {
6190 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6193 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6196 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6199 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6202 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6206 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6210 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6211 DRM_DEBUG("cik_irq_set: sw int dma\n");
6212 dma_cntl |= TRAP_ENABLE;
6215 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6216 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6217 dma_cntl1 |= TRAP_ENABLE;
6220 if (rdev->irq.crtc_vblank_int[0] ||
6221 atomic_read(&rdev->irq.pflip[0])) {
6222 DRM_DEBUG("cik_irq_set: vblank 0\n");
6223 crtc1 |= VBLANK_INTERRUPT_MASK;
6225 if (rdev->irq.crtc_vblank_int[1] ||
6226 atomic_read(&rdev->irq.pflip[1])) {
6227 DRM_DEBUG("cik_irq_set: vblank 1\n");
6228 crtc2 |= VBLANK_INTERRUPT_MASK;
6230 if (rdev->irq.crtc_vblank_int[2] ||
6231 atomic_read(&rdev->irq.pflip[2])) {
6232 DRM_DEBUG("cik_irq_set: vblank 2\n");
6233 crtc3 |= VBLANK_INTERRUPT_MASK;
6235 if (rdev->irq.crtc_vblank_int[3] ||
6236 atomic_read(&rdev->irq.pflip[3])) {
6237 DRM_DEBUG("cik_irq_set: vblank 3\n");
6238 crtc4 |= VBLANK_INTERRUPT_MASK;
6240 if (rdev->irq.crtc_vblank_int[4] ||
6241 atomic_read(&rdev->irq.pflip[4])) {
6242 DRM_DEBUG("cik_irq_set: vblank 4\n");
6243 crtc5 |= VBLANK_INTERRUPT_MASK;
6245 if (rdev->irq.crtc_vblank_int[5] ||
6246 atomic_read(&rdev->irq.pflip[5])) {
6247 DRM_DEBUG("cik_irq_set: vblank 5\n");
6248 crtc6 |= VBLANK_INTERRUPT_MASK;
6250 if (rdev->irq.hpd[0]) {
6251 DRM_DEBUG("cik_irq_set: hpd 1\n");
6252 hpd1 |= DC_HPDx_INT_EN;
6254 if (rdev->irq.hpd[1]) {
6255 DRM_DEBUG("cik_irq_set: hpd 2\n");
6256 hpd2 |= DC_HPDx_INT_EN;
6258 if (rdev->irq.hpd[2]) {
6259 DRM_DEBUG("cik_irq_set: hpd 3\n");
6260 hpd3 |= DC_HPDx_INT_EN;
6262 if (rdev->irq.hpd[3]) {
6263 DRM_DEBUG("cik_irq_set: hpd 4\n");
6264 hpd4 |= DC_HPDx_INT_EN;
6266 if (rdev->irq.hpd[4]) {
6267 DRM_DEBUG("cik_irq_set: hpd 5\n");
6268 hpd5 |= DC_HPDx_INT_EN;
6270 if (rdev->irq.hpd[5]) {
6271 DRM_DEBUG("cik_irq_set: hpd 6\n");
6272 hpd6 |= DC_HPDx_INT_EN;
6275 if (rdev->irq.dpm_thermal) {
6276 DRM_DEBUG("dpm thermal\n");
6277 if (rdev->flags & RADEON_IS_IGP)
6278 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6280 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6283 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6285 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6286 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6288 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6289 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6290 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6291 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6292 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6293 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6294 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6295 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6297 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6300 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6301 if (rdev->num_crtc >= 4) {
6302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6305 if (rdev->num_crtc >= 6) {
6306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6310 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6311 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6312 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6313 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6314 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6315 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6317 if (rdev->flags & RADEON_IS_IGP)
6318 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6320 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6326 * cik_irq_ack - ack interrupt sources
6328 * @rdev: radeon_device pointer
6330 * Ack interrupt sources on the GPU (vblanks, hpd,
6331 * etc.) (CIK). Certain interrupts sources are sw
6332 * generated and do not require an explicit ack.
6334 static inline void cik_irq_ack(struct radeon_device *rdev)
6338 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6339 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6340 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6341 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6342 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6343 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6344 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6346 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6350 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6355 if (rdev->num_crtc >= 4) {
6356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6358 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6360 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6361 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6362 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6363 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6366 if (rdev->num_crtc >= 6) {
6367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6369 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6373 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6377 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6378 tmp = RREG32(DC_HPD1_INT_CONTROL);
6379 tmp |= DC_HPDx_INT_ACK;
6380 WREG32(DC_HPD1_INT_CONTROL, tmp);
6382 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6383 tmp = RREG32(DC_HPD2_INT_CONTROL);
6384 tmp |= DC_HPDx_INT_ACK;
6385 WREG32(DC_HPD2_INT_CONTROL, tmp);
6387 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6388 tmp = RREG32(DC_HPD3_INT_CONTROL);
6389 tmp |= DC_HPDx_INT_ACK;
6390 WREG32(DC_HPD3_INT_CONTROL, tmp);
6392 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6393 tmp = RREG32(DC_HPD4_INT_CONTROL);
6394 tmp |= DC_HPDx_INT_ACK;
6395 WREG32(DC_HPD4_INT_CONTROL, tmp);
6397 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6398 tmp = RREG32(DC_HPD5_INT_CONTROL);
6399 tmp |= DC_HPDx_INT_ACK;
6400 WREG32(DC_HPD5_INT_CONTROL, tmp);
6402 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6403 tmp = RREG32(DC_HPD5_INT_CONTROL);
6404 tmp |= DC_HPDx_INT_ACK;
6405 WREG32(DC_HPD6_INT_CONTROL, tmp);
6410 * cik_irq_disable - disable interrupts
6412 * @rdev: radeon_device pointer
6414 * Disable interrupts on the hw (CIK).
6416 static void cik_irq_disable(struct radeon_device *rdev)
6418 cik_disable_interrupts(rdev);
6419 /* Wait and acknowledge irq */
6422 cik_disable_interrupt_state(rdev);
6426 * cik_irq_disable - disable interrupts for suspend
6428 * @rdev: radeon_device pointer
6430 * Disable interrupts and stop the RLC (CIK).
6433 static void cik_irq_suspend(struct radeon_device *rdev)
6435 cik_irq_disable(rdev);
6440 * cik_irq_fini - tear down interrupt support
6442 * @rdev: radeon_device pointer
6444 * Disable interrupts on the hw and free the IH ring
6446 * Used for driver unload.
6448 static void cik_irq_fini(struct radeon_device *rdev)
6450 cik_irq_suspend(rdev);
6451 r600_ih_ring_fini(rdev);
6455 * cik_get_ih_wptr - get the IH ring buffer wptr
6457 * @rdev: radeon_device pointer
6459 * Get the IH ring buffer wptr from either the register
6460 * or the writeback memory buffer (CIK). Also check for
6461 * ring buffer overflow and deal with it.
6462 * Used by cik_irq_process().
6463 * Returns the value of the wptr.
6465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6469 if (rdev->wb.enabled)
6470 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6472 wptr = RREG32(IH_RB_WPTR);
6474 if (wptr & RB_OVERFLOW) {
6475 /* When a ring buffer overflow happen start parsing interrupt
6476 * from the last not overwritten vector (wptr + 16). Hopefully
6477 * this should allow us to catchup.
6479 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6480 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6481 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6482 tmp = RREG32(IH_RB_CNTL);
6483 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6484 WREG32(IH_RB_CNTL, tmp);
6486 return (wptr & rdev->ih.ptr_mask);
6490 * Each IV ring entry is 128 bits:
6491 * [7:0] - interrupt source id
6493 * [59:32] - interrupt source data
6494 * [63:60] - reserved
6497 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6498 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6499 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6500 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6501 * PIPE_ID - ME0 0=3D
6502 * - ME1&2 compute dispatcher (4 pipes each)
6504 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6505 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6506 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6509 * [127:96] - reserved
6512 * cik_irq_process - interrupt handler
6514 * @rdev: radeon_device pointer
6516 * Interrupt hander (CIK). Walk the IH ring,
6517 * ack interrupts and schedule work to handle
6519 * Returns irq process return code.
6521 int cik_irq_process(struct radeon_device *rdev)
6523 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6524 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6527 u32 src_id, src_data, ring_id;
6528 u8 me_id, pipe_id, queue_id;
6530 bool queue_hotplug = false;
6531 bool queue_reset = false;
6532 u32 addr, status, mc_client;
6533 bool queue_thermal = false;
6535 if (!rdev->ih.enabled || rdev->shutdown)
6538 wptr = cik_get_ih_wptr(rdev);
6541 /* is somebody else already processing irqs? */
6542 if (atomic_xchg(&rdev->ih.lock, 1))
6545 rptr = rdev->ih.rptr;
6546 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6548 /* Order reading of wptr vs. reading of IH ring data */
6551 /* display interrupts */
6554 while (rptr != wptr) {
6555 /* wptr/rptr are in bytes! */
6556 ring_index = rptr / 4;
6557 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6558 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6559 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6562 case 1: /* D1 vblank/vline */
6564 case 0: /* D1 vblank */
6565 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6566 if (rdev->irq.crtc_vblank_int[0]) {
6567 drm_handle_vblank(rdev->ddev, 0);
6568 rdev->pm.vblank_sync = true;
6569 wake_up(&rdev->irq.vblank_queue);
6571 if (atomic_read(&rdev->irq.pflip[0]))
6572 radeon_crtc_handle_flip(rdev, 0);
6573 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6574 DRM_DEBUG("IH: D1 vblank\n");
6577 case 1: /* D1 vline */
6578 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6579 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6580 DRM_DEBUG("IH: D1 vline\n");
6584 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6588 case 2: /* D2 vblank/vline */
6590 case 0: /* D2 vblank */
6591 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6592 if (rdev->irq.crtc_vblank_int[1]) {
6593 drm_handle_vblank(rdev->ddev, 1);
6594 rdev->pm.vblank_sync = true;
6595 wake_up(&rdev->irq.vblank_queue);
6597 if (atomic_read(&rdev->irq.pflip[1]))
6598 radeon_crtc_handle_flip(rdev, 1);
6599 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6600 DRM_DEBUG("IH: D2 vblank\n");
6603 case 1: /* D2 vline */
6604 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6605 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6606 DRM_DEBUG("IH: D2 vline\n");
6610 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6614 case 3: /* D3 vblank/vline */
6616 case 0: /* D3 vblank */
6617 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6618 if (rdev->irq.crtc_vblank_int[2]) {
6619 drm_handle_vblank(rdev->ddev, 2);
6620 rdev->pm.vblank_sync = true;
6621 wake_up(&rdev->irq.vblank_queue);
6623 if (atomic_read(&rdev->irq.pflip[2]))
6624 radeon_crtc_handle_flip(rdev, 2);
6625 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6626 DRM_DEBUG("IH: D3 vblank\n");
6629 case 1: /* D3 vline */
6630 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6631 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6632 DRM_DEBUG("IH: D3 vline\n");
6636 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6640 case 4: /* D4 vblank/vline */
6642 case 0: /* D4 vblank */
6643 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6644 if (rdev->irq.crtc_vblank_int[3]) {
6645 drm_handle_vblank(rdev->ddev, 3);
6646 rdev->pm.vblank_sync = true;
6647 wake_up(&rdev->irq.vblank_queue);
6649 if (atomic_read(&rdev->irq.pflip[3]))
6650 radeon_crtc_handle_flip(rdev, 3);
6651 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6652 DRM_DEBUG("IH: D4 vblank\n");
6655 case 1: /* D4 vline */
6656 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6657 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6658 DRM_DEBUG("IH: D4 vline\n");
6662 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6666 case 5: /* D5 vblank/vline */
6668 case 0: /* D5 vblank */
6669 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6670 if (rdev->irq.crtc_vblank_int[4]) {
6671 drm_handle_vblank(rdev->ddev, 4);
6672 rdev->pm.vblank_sync = true;
6673 wake_up(&rdev->irq.vblank_queue);
6675 if (atomic_read(&rdev->irq.pflip[4]))
6676 radeon_crtc_handle_flip(rdev, 4);
6677 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6678 DRM_DEBUG("IH: D5 vblank\n");
6681 case 1: /* D5 vline */
6682 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6683 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6684 DRM_DEBUG("IH: D5 vline\n");
6688 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6692 case 6: /* D6 vblank/vline */
6694 case 0: /* D6 vblank */
6695 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6696 if (rdev->irq.crtc_vblank_int[5]) {
6697 drm_handle_vblank(rdev->ddev, 5);
6698 rdev->pm.vblank_sync = true;
6699 wake_up(&rdev->irq.vblank_queue);
6701 if (atomic_read(&rdev->irq.pflip[5]))
6702 radeon_crtc_handle_flip(rdev, 5);
6703 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6704 DRM_DEBUG("IH: D6 vblank\n");
6707 case 1: /* D6 vline */
6708 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6709 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6710 DRM_DEBUG("IH: D6 vline\n");
6714 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6718 case 42: /* HPD hotplug */
6721 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6722 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6723 queue_hotplug = true;
6724 DRM_DEBUG("IH: HPD1\n");
6728 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6729 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6730 queue_hotplug = true;
6731 DRM_DEBUG("IH: HPD2\n");
6735 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6736 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6737 queue_hotplug = true;
6738 DRM_DEBUG("IH: HPD3\n");
6742 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6743 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6744 queue_hotplug = true;
6745 DRM_DEBUG("IH: HPD4\n");
6749 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6750 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6751 queue_hotplug = true;
6752 DRM_DEBUG("IH: HPD5\n");
6756 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6757 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6758 queue_hotplug = true;
6759 DRM_DEBUG("IH: HPD6\n");
6763 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6768 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6769 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6773 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6774 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6775 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6776 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6777 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6779 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6781 cik_vm_decode_fault(rdev, status, addr, mc_client);
6782 /* reset addr and status */
6783 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6785 case 176: /* GFX RB CP_INT */
6786 case 177: /* GFX IB CP_INT */
6787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6789 case 181: /* CP EOP event */
6790 DRM_DEBUG("IH: CP EOP\n");
6791 /* XXX check the bitfield order! */
6792 me_id = (ring_id & 0x60) >> 5;
6793 pipe_id = (ring_id & 0x18) >> 3;
6794 queue_id = (ring_id & 0x7) >> 0;
6797 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6801 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6802 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6803 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6804 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6808 case 184: /* CP Privileged reg access */
6809 DRM_ERROR("Illegal register access in command stream\n");
6810 /* XXX check the bitfield order! */
6811 me_id = (ring_id & 0x60) >> 5;
6812 pipe_id = (ring_id & 0x18) >> 3;
6813 queue_id = (ring_id & 0x7) >> 0;
6816 /* This results in a full GPU reset, but all we need to do is soft
6817 * reset the CP for gfx
6831 case 185: /* CP Privileged inst */
6832 DRM_ERROR("Illegal instruction in command stream\n");
6833 /* XXX check the bitfield order! */
6834 me_id = (ring_id & 0x60) >> 5;
6835 pipe_id = (ring_id & 0x18) >> 3;
6836 queue_id = (ring_id & 0x7) >> 0;
6839 /* This results in a full GPU reset, but all we need to do is soft
6840 * reset the CP for gfx
6854 case 224: /* SDMA trap event */
6855 /* XXX check the bitfield order! */
6856 me_id = (ring_id & 0x3) >> 0;
6857 queue_id = (ring_id & 0xc) >> 2;
6858 DRM_DEBUG("IH: SDMA trap\n");
6863 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6876 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6888 case 230: /* thermal low to high */
6889 DRM_DEBUG("IH: thermal low to high\n");
6890 rdev->pm.dpm.thermal.high_to_low = false;
6891 queue_thermal = true;
6893 case 231: /* thermal high to low */
6894 DRM_DEBUG("IH: thermal high to low\n");
6895 rdev->pm.dpm.thermal.high_to_low = true;
6896 queue_thermal = true;
6898 case 233: /* GUI IDLE */
6899 DRM_DEBUG("IH: GUI idle\n");
6901 case 241: /* SDMA Privileged inst */
6902 case 247: /* SDMA Privileged inst */
6903 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6904 /* XXX check the bitfield order! */
6905 me_id = (ring_id & 0x3) >> 0;
6906 queue_id = (ring_id & 0xc) >> 2;
6941 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6945 /* wptr/rptr are in bytes! */
6947 rptr &= rdev->ih.ptr_mask;
6950 schedule_work(&rdev->hotplug_work);
6952 schedule_work(&rdev->reset_work);
6954 schedule_work(&rdev->pm.dpm.thermal.work);
6955 rdev->ih.rptr = rptr;
6956 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6957 atomic_set(&rdev->ih.lock, 0);
6959 /* make sure wptr hasn't changed while processing */
6960 wptr = cik_get_ih_wptr(rdev);
6968 * startup/shutdown callbacks
6971 * cik_startup - program the asic to a functional state
6973 * @rdev: radeon_device pointer
6975 * Programs the asic to a functional state (CIK).
6976 * Called by cik_init() and cik_resume().
6977 * Returns 0 for success, error for failure.
6979 static int cik_startup(struct radeon_device *rdev)
6981 struct radeon_ring *ring;
6984 /* enable pcie gen2/3 link */
6985 cik_pcie_gen3_enable(rdev);
6987 cik_program_aspm(rdev);
6989 /* scratch needs to be initialized before MC */
6990 r = r600_vram_scratch_init(rdev);
6994 cik_mc_program(rdev);
6996 if (rdev->flags & RADEON_IS_IGP) {
6997 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6999 r = cik_init_microcode(rdev);
7001 DRM_ERROR("Failed to load firmware!\n");
7006 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7007 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7009 r = cik_init_microcode(rdev);
7011 DRM_ERROR("Failed to load firmware!\n");
7016 r = ci_mc_load_microcode(rdev);
7018 DRM_ERROR("Failed to load MC firmware!\n");
7023 r = cik_pcie_gart_enable(rdev);
7028 /* allocate rlc buffers */
7029 if (rdev->flags & RADEON_IS_IGP) {
7030 if (rdev->family == CHIP_KAVERI) {
7031 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7032 rdev->rlc.reg_list_size =
7033 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7035 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7036 rdev->rlc.reg_list_size =
7037 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7040 rdev->rlc.cs_data = ci_cs_data;
7041 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7042 r = sumo_rlc_init(rdev);
7044 DRM_ERROR("Failed to init rlc BOs!\n");
7048 /* allocate wb buffer */
7049 r = radeon_wb_init(rdev);
7053 /* allocate mec buffers */
7054 r = cik_mec_init(rdev);
7056 DRM_ERROR("Failed to init MEC BOs!\n");
7060 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7062 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7066 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7068 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7072 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7074 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7078 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7080 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7084 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7086 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7090 r = radeon_uvd_resume(rdev);
7092 r = uvd_v4_2_resume(rdev);
7094 r = radeon_fence_driver_start_ring(rdev,
7095 R600_RING_TYPE_UVD_INDEX);
7097 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7101 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7104 if (!rdev->irq.installed) {
7105 r = radeon_irq_kms_init(rdev);
7110 r = cik_irq_init(rdev);
7112 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113 radeon_irq_kms_fini(rdev);
7118 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120 CP_RB0_RPTR, CP_RB0_WPTR,
7125 /* set up the compute queues */
7126 /* type-2 packets are deprecated on MEC, use type-3 instead */
7127 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7128 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7129 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7130 PACKET3(PACKET3_NOP, 0x3FFF));
7133 ring->me = 1; /* first MEC */
7134 ring->pipe = 0; /* first pipe */
7135 ring->queue = 0; /* first queue */
7136 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7138 /* type-2 packets are deprecated on MEC, use type-3 instead */
7139 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7140 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7141 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7142 PACKET3(PACKET3_NOP, 0x3FFF));
7145 /* dGPU only have 1 MEC */
7146 ring->me = 1; /* first MEC */
7147 ring->pipe = 0; /* first pipe */
7148 ring->queue = 1; /* second queue */
7149 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7151 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7152 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7153 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7154 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7155 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7159 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7160 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7161 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7162 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7163 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7167 r = cik_cp_resume(rdev);
7171 r = cik_sdma_resume(rdev);
7175 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7176 if (ring->ring_size) {
7177 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7178 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7181 r = uvd_v1_0_init(rdev);
7183 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7186 r = radeon_ib_pool_init(rdev);
7188 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7192 r = radeon_vm_manager_init(rdev);
7194 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7198 r = dce6_audio_init(rdev);
7206 * cik_resume - resume the asic to a functional state
7208 * @rdev: radeon_device pointer
7210 * Programs the asic to a functional state (CIK).
7212 * Returns 0 for success, error for failure.
7214 int cik_resume(struct radeon_device *rdev)
7219 atom_asic_init(rdev->mode_info.atom_context);
7221 /* init golden registers */
7222 cik_init_golden_registers(rdev);
7224 rdev->accel_working = true;
7225 r = cik_startup(rdev);
7227 DRM_ERROR("cik startup failed on resume\n");
7228 rdev->accel_working = false;
7237 * cik_suspend - suspend the asic
7239 * @rdev: radeon_device pointer
7241 * Bring the chip into a state suitable for suspend (CIK).
7242 * Called at suspend.
7243 * Returns 0 for success.
7245 int cik_suspend(struct radeon_device *rdev)
7247 dce6_audio_fini(rdev);
7248 radeon_vm_manager_fini(rdev);
7249 cik_cp_enable(rdev, false);
7250 cik_sdma_enable(rdev, false);
7251 uvd_v1_0_fini(rdev);
7252 radeon_uvd_suspend(rdev);
7255 cik_irq_suspend(rdev);
7256 radeon_wb_disable(rdev);
7257 cik_pcie_gart_disable(rdev);
7261 /* Plan is to move initialization in that function and use
7262 * helper function so that radeon_device_init pretty much
7263 * do nothing more than calling asic specific function. This
7264 * should also allow to remove a bunch of callback function
7268 * cik_init - asic specific driver and hw init
7270 * @rdev: radeon_device pointer
7272 * Setup asic specific driver variables and program the hw
7273 * to a functional state (CIK).
7274 * Called at driver startup.
7275 * Returns 0 for success, errors for failure.
7277 int cik_init(struct radeon_device *rdev)
7279 struct radeon_ring *ring;
7283 if (!radeon_get_bios(rdev)) {
7284 if (ASIC_IS_AVIVO(rdev))
7287 /* Must be an ATOMBIOS */
7288 if (!rdev->is_atom_bios) {
7289 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7292 r = radeon_atombios_init(rdev);
7296 /* Post card if necessary */
7297 if (!radeon_card_posted(rdev)) {
7299 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7302 DRM_INFO("GPU not posted. posting now...\n");
7303 atom_asic_init(rdev->mode_info.atom_context);
7305 /* init golden registers */
7306 cik_init_golden_registers(rdev);
7307 /* Initialize scratch registers */
7308 cik_scratch_init(rdev);
7309 /* Initialize surface registers */
7310 radeon_surface_init(rdev);
7311 /* Initialize clocks */
7312 radeon_get_clock_info(rdev->ddev);
7315 r = radeon_fence_driver_init(rdev);
7319 /* initialize memory controller */
7320 r = cik_mc_init(rdev);
7323 /* Memory manager */
7324 r = radeon_bo_init(rdev);
7328 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7329 ring->ring_obj = NULL;
7330 r600_ring_init(rdev, ring, 1024 * 1024);
7332 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7333 ring->ring_obj = NULL;
7334 r600_ring_init(rdev, ring, 1024 * 1024);
7335 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7339 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7340 ring->ring_obj = NULL;
7341 r600_ring_init(rdev, ring, 1024 * 1024);
7342 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7346 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7347 ring->ring_obj = NULL;
7348 r600_ring_init(rdev, ring, 256 * 1024);
7350 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7351 ring->ring_obj = NULL;
7352 r600_ring_init(rdev, ring, 256 * 1024);
7354 r = radeon_uvd_init(rdev);
7356 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7357 ring->ring_obj = NULL;
7358 r600_ring_init(rdev, ring, 4096);
7361 rdev->ih.ring_obj = NULL;
7362 r600_ih_ring_init(rdev, 64 * 1024);
7364 r = r600_pcie_gart_init(rdev);
7368 rdev->accel_working = true;
7369 r = cik_startup(rdev);
7371 dev_err(rdev->dev, "disabling GPU acceleration\n");
7373 cik_sdma_fini(rdev);
7375 sumo_rlc_fini(rdev);
7377 radeon_wb_fini(rdev);
7378 radeon_ib_pool_fini(rdev);
7379 radeon_vm_manager_fini(rdev);
7380 radeon_irq_kms_fini(rdev);
7381 cik_pcie_gart_fini(rdev);
7382 rdev->accel_working = false;
7385 /* Don't start up if the MC ucode is missing.
7386 * The default clocks and voltages before the MC ucode
7387 * is loaded are not suffient for advanced operations.
7389 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7390 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7398 * cik_fini - asic specific driver and hw fini
7400 * @rdev: radeon_device pointer
7402 * Tear down the asic specific driver variables and program the hw
7403 * to an idle state (CIK).
7404 * Called at driver unload.
7406 void cik_fini(struct radeon_device *rdev)
7409 cik_sdma_fini(rdev);
7413 sumo_rlc_fini(rdev);
7415 radeon_wb_fini(rdev);
7416 radeon_vm_manager_fini(rdev);
7417 radeon_ib_pool_fini(rdev);
7418 radeon_irq_kms_fini(rdev);
7419 uvd_v1_0_fini(rdev);
7420 radeon_uvd_fini(rdev);
7421 cik_pcie_gart_fini(rdev);
7422 r600_vram_scratch_fini(rdev);
7423 radeon_gem_fini(rdev);
7424 radeon_fence_driver_fini(rdev);
7425 radeon_bo_fini(rdev);
7426 radeon_atombios_fini(rdev);
7431 /* display watermark setup */
7433 * dce8_line_buffer_adjust - Set up the line buffer
7435 * @rdev: radeon_device pointer
7436 * @radeon_crtc: the selected display controller
7437 * @mode: the current display mode on the selected display
7440 * Setup up the line buffer allocation for
7441 * the selected display controller (CIK).
7442 * Returns the line buffer size in pixels.
7444 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7445 struct radeon_crtc *radeon_crtc,
7446 struct drm_display_mode *mode)
7448 u32 tmp, buffer_alloc, i;
7449 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7452 * There are 6 line buffers, one for each display controllers.
7453 * There are 3 partitions per LB. Select the number of partitions
7454 * to enable based on the display width. For display widths larger
7455 * than 4096, you need use to use 2 display controllers and combine
7456 * them using the stereo blender.
7458 if (radeon_crtc->base.enabled && mode) {
7459 if (mode->crtc_hdisplay < 1920) {
7462 } else if (mode->crtc_hdisplay < 2560) {
7465 } else if (mode->crtc_hdisplay < 4096) {
7467 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7469 DRM_DEBUG_KMS("Mode too big for LB!\n");
7471 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7478 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7479 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7481 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7482 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7483 for (i = 0; i < rdev->usec_timeout; i++) {
7484 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7485 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7490 if (radeon_crtc->base.enabled && mode) {
7502 /* controller not enabled, so no lb used */
7507 * cik_get_number_of_dram_channels - get the number of dram channels
7509 * @rdev: radeon_device pointer
7511 * Look up the number of video ram channels (CIK).
7512 * Used for display watermark bandwidth calculations
7513 * Returns the number of dram channels
7515 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7517 u32 tmp = RREG32(MC_SHARED_CHMAP);
7519 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7542 struct dce8_wm_params {
7543 u32 dram_channels; /* number of dram channels */
7544 u32 yclk; /* bandwidth per dram data pin in kHz */
7545 u32 sclk; /* engine clock in kHz */
7546 u32 disp_clk; /* display clock in kHz */
7547 u32 src_width; /* viewport width */
7548 u32 active_time; /* active display time in ns */
7549 u32 blank_time; /* blank time in ns */
7550 bool interlaced; /* mode is interlaced */
7551 fixed20_12 vsc; /* vertical scale ratio */
7552 u32 num_heads; /* number of active crtcs */
7553 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7554 u32 lb_size; /* line buffer allocated to pipe */
7555 u32 vtaps; /* vertical scaler taps */
7559 * dce8_dram_bandwidth - get the dram bandwidth
7561 * @wm: watermark calculation data
7563 * Calculate the raw dram bandwidth (CIK).
7564 * Used for display watermark bandwidth calculations
7565 * Returns the dram bandwidth in MBytes/s
7567 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7569 /* Calculate raw DRAM Bandwidth */
7570 fixed20_12 dram_efficiency; /* 0.7 */
7571 fixed20_12 yclk, dram_channels, bandwidth;
7574 a.full = dfixed_const(1000);
7575 yclk.full = dfixed_const(wm->yclk);
7576 yclk.full = dfixed_div(yclk, a);
7577 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7578 a.full = dfixed_const(10);
7579 dram_efficiency.full = dfixed_const(7);
7580 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7581 bandwidth.full = dfixed_mul(dram_channels, yclk);
7582 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7584 return dfixed_trunc(bandwidth);
7588 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7590 * @wm: watermark calculation data
7592 * Calculate the dram bandwidth used for display (CIK).
7593 * Used for display watermark bandwidth calculations
7594 * Returns the dram bandwidth for display in MBytes/s
7596 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7598 /* Calculate DRAM Bandwidth and the part allocated to display. */
7599 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7600 fixed20_12 yclk, dram_channels, bandwidth;
7603 a.full = dfixed_const(1000);
7604 yclk.full = dfixed_const(wm->yclk);
7605 yclk.full = dfixed_div(yclk, a);
7606 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7607 a.full = dfixed_const(10);
7608 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7609 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7610 bandwidth.full = dfixed_mul(dram_channels, yclk);
7611 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7613 return dfixed_trunc(bandwidth);
7617 * dce8_data_return_bandwidth - get the data return bandwidth
7619 * @wm: watermark calculation data
7621 * Calculate the data return bandwidth used for display (CIK).
7622 * Used for display watermark bandwidth calculations
7623 * Returns the data return bandwidth in MBytes/s
7625 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7627 /* Calculate the display Data return Bandwidth */
7628 fixed20_12 return_efficiency; /* 0.8 */
7629 fixed20_12 sclk, bandwidth;
7632 a.full = dfixed_const(1000);
7633 sclk.full = dfixed_const(wm->sclk);
7634 sclk.full = dfixed_div(sclk, a);
7635 a.full = dfixed_const(10);
7636 return_efficiency.full = dfixed_const(8);
7637 return_efficiency.full = dfixed_div(return_efficiency, a);
7638 a.full = dfixed_const(32);
7639 bandwidth.full = dfixed_mul(a, sclk);
7640 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7642 return dfixed_trunc(bandwidth);
7646 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7648 * @wm: watermark calculation data
7650 * Calculate the dmif bandwidth used for display (CIK).
7651 * Used for display watermark bandwidth calculations
7652 * Returns the dmif bandwidth in MBytes/s
7654 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7656 /* Calculate the DMIF Request Bandwidth */
7657 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7658 fixed20_12 disp_clk, bandwidth;
7661 a.full = dfixed_const(1000);
7662 disp_clk.full = dfixed_const(wm->disp_clk);
7663 disp_clk.full = dfixed_div(disp_clk, a);
7664 a.full = dfixed_const(32);
7665 b.full = dfixed_mul(a, disp_clk);
7667 a.full = dfixed_const(10);
7668 disp_clk_request_efficiency.full = dfixed_const(8);
7669 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7671 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7673 return dfixed_trunc(bandwidth);
7677 * dce8_available_bandwidth - get the min available bandwidth
7679 * @wm: watermark calculation data
7681 * Calculate the min available bandwidth used for display (CIK).
7682 * Used for display watermark bandwidth calculations
7683 * Returns the min available bandwidth in MBytes/s
7685 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7687 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7688 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7689 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7690 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7692 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7696 * dce8_average_bandwidth - get the average available bandwidth
7698 * @wm: watermark calculation data
7700 * Calculate the average available bandwidth used for display (CIK).
7701 * Used for display watermark bandwidth calculations
7702 * Returns the average available bandwidth in MBytes/s
7704 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7706 /* Calculate the display mode Average Bandwidth
7707 * DisplayMode should contain the source and destination dimensions,
7711 fixed20_12 line_time;
7712 fixed20_12 src_width;
7713 fixed20_12 bandwidth;
7716 a.full = dfixed_const(1000);
7717 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7718 line_time.full = dfixed_div(line_time, a);
7719 bpp.full = dfixed_const(wm->bytes_per_pixel);
7720 src_width.full = dfixed_const(wm->src_width);
7721 bandwidth.full = dfixed_mul(src_width, bpp);
7722 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7723 bandwidth.full = dfixed_div(bandwidth, line_time);
7725 return dfixed_trunc(bandwidth);
7729 * dce8_latency_watermark - get the latency watermark
7731 * @wm: watermark calculation data
7733 * Calculate the latency watermark (CIK).
7734 * Used for display watermark bandwidth calculations
7735 * Returns the latency watermark in ns
7737 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7739 /* First calculate the latency in ns */
7740 u32 mc_latency = 2000; /* 2000 ns. */
7741 u32 available_bandwidth = dce8_available_bandwidth(wm);
7742 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7743 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7744 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7745 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7746 (wm->num_heads * cursor_line_pair_return_time);
7747 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7748 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7749 u32 tmp, dmif_size = 12288;
7752 if (wm->num_heads == 0)
7755 a.full = dfixed_const(2);
7756 b.full = dfixed_const(1);
7757 if ((wm->vsc.full > a.full) ||
7758 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7760 ((wm->vsc.full >= a.full) && wm->interlaced))
7761 max_src_lines_per_dst_line = 4;
7763 max_src_lines_per_dst_line = 2;
7765 a.full = dfixed_const(available_bandwidth);
7766 b.full = dfixed_const(wm->num_heads);
7767 a.full = dfixed_div(a, b);
7769 b.full = dfixed_const(mc_latency + 512);
7770 c.full = dfixed_const(wm->disp_clk);
7771 b.full = dfixed_div(b, c);
7773 c.full = dfixed_const(dmif_size);
7774 b.full = dfixed_div(c, b);
7776 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7778 b.full = dfixed_const(1000);
7779 c.full = dfixed_const(wm->disp_clk);
7780 b.full = dfixed_div(c, b);
7781 c.full = dfixed_const(wm->bytes_per_pixel);
7782 b.full = dfixed_mul(b, c);
7784 lb_fill_bw = min(tmp, dfixed_trunc(b));
7786 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7787 b.full = dfixed_const(1000);
7788 c.full = dfixed_const(lb_fill_bw);
7789 b.full = dfixed_div(c, b);
7790 a.full = dfixed_div(a, b);
7791 line_fill_time = dfixed_trunc(a);
7793 if (line_fill_time < wm->active_time)
7796 return latency + (line_fill_time - wm->active_time);
7801 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7802 * average and available dram bandwidth
7804 * @wm: watermark calculation data
7806 * Check if the display average bandwidth fits in the display
7807 * dram bandwidth (CIK).
7808 * Used for display watermark bandwidth calculations
7809 * Returns true if the display fits, false if not.
7811 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7813 if (dce8_average_bandwidth(wm) <=
7814 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7821 * dce8_average_bandwidth_vs_available_bandwidth - check
7822 * average and available bandwidth
7824 * @wm: watermark calculation data
7826 * Check if the display average bandwidth fits in the display
7827 * available bandwidth (CIK).
7828 * Used for display watermark bandwidth calculations
7829 * Returns true if the display fits, false if not.
7831 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7833 if (dce8_average_bandwidth(wm) <=
7834 (dce8_available_bandwidth(wm) / wm->num_heads))
7841 * dce8_check_latency_hiding - check latency hiding
7843 * @wm: watermark calculation data
7845 * Check latency hiding (CIK).
7846 * Used for display watermark bandwidth calculations
7847 * Returns true if the display fits, false if not.
7849 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7851 u32 lb_partitions = wm->lb_size / wm->src_width;
7852 u32 line_time = wm->active_time + wm->blank_time;
7853 u32 latency_tolerant_lines;
7857 a.full = dfixed_const(1);
7858 if (wm->vsc.full > a.full)
7859 latency_tolerant_lines = 1;
7861 if (lb_partitions <= (wm->vtaps + 1))
7862 latency_tolerant_lines = 1;
7864 latency_tolerant_lines = 2;
7867 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7869 if (dce8_latency_watermark(wm) <= latency_hiding)
7876 * dce8_program_watermarks - program display watermarks
7878 * @rdev: radeon_device pointer
7879 * @radeon_crtc: the selected display controller
7880 * @lb_size: line buffer size
7881 * @num_heads: number of display controllers in use
7883 * Calculate and program the display watermarks for the
7884 * selected display controller (CIK).
7886 static void dce8_program_watermarks(struct radeon_device *rdev,
7887 struct radeon_crtc *radeon_crtc,
7888 u32 lb_size, u32 num_heads)
7890 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7891 struct dce8_wm_params wm_low, wm_high;
7894 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7897 if (radeon_crtc->base.enabled && num_heads && mode) {
7898 pixel_period = 1000000 / (u32)mode->clock;
7899 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7901 /* watermark for high clocks */
7902 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7903 rdev->pm.dpm_enabled) {
7905 radeon_dpm_get_mclk(rdev, false) * 10;
7907 radeon_dpm_get_sclk(rdev, false) * 10;
7909 wm_high.yclk = rdev->pm.current_mclk * 10;
7910 wm_high.sclk = rdev->pm.current_sclk * 10;
7913 wm_high.disp_clk = mode->clock;
7914 wm_high.src_width = mode->crtc_hdisplay;
7915 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7916 wm_high.blank_time = line_time - wm_high.active_time;
7917 wm_high.interlaced = false;
7918 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7919 wm_high.interlaced = true;
7920 wm_high.vsc = radeon_crtc->vsc;
7922 if (radeon_crtc->rmx_type != RMX_OFF)
7924 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7925 wm_high.lb_size = lb_size;
7926 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7927 wm_high.num_heads = num_heads;
7929 /* set for high clocks */
7930 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7932 /* possibly force display priority to high */
7933 /* should really do this at mode validation time... */
7934 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7935 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7936 !dce8_check_latency_hiding(&wm_high) ||
7937 (rdev->disp_priority == 2)) {
7938 DRM_DEBUG_KMS("force priority to high\n");
7941 /* watermark for low clocks */
7942 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7943 rdev->pm.dpm_enabled) {
7945 radeon_dpm_get_mclk(rdev, true) * 10;
7947 radeon_dpm_get_sclk(rdev, true) * 10;
7949 wm_low.yclk = rdev->pm.current_mclk * 10;
7950 wm_low.sclk = rdev->pm.current_sclk * 10;
7953 wm_low.disp_clk = mode->clock;
7954 wm_low.src_width = mode->crtc_hdisplay;
7955 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7956 wm_low.blank_time = line_time - wm_low.active_time;
7957 wm_low.interlaced = false;
7958 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7959 wm_low.interlaced = true;
7960 wm_low.vsc = radeon_crtc->vsc;
7962 if (radeon_crtc->rmx_type != RMX_OFF)
7964 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7965 wm_low.lb_size = lb_size;
7966 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7967 wm_low.num_heads = num_heads;
7969 /* set for low clocks */
7970 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7972 /* possibly force display priority to high */
7973 /* should really do this at mode validation time... */
7974 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7975 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7976 !dce8_check_latency_hiding(&wm_low) ||
7977 (rdev->disp_priority == 2)) {
7978 DRM_DEBUG_KMS("force priority to high\n");
7983 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7985 tmp &= ~LATENCY_WATERMARK_MASK(3);
7986 tmp |= LATENCY_WATERMARK_MASK(1);
7987 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7988 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7989 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7990 LATENCY_HIGH_WATERMARK(line_time)));
7992 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7993 tmp &= ~LATENCY_WATERMARK_MASK(3);
7994 tmp |= LATENCY_WATERMARK_MASK(2);
7995 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7996 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7997 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7998 LATENCY_HIGH_WATERMARK(line_time)));
7999 /* restore original selection */
8000 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8002 /* save values for DPM */
8003 radeon_crtc->line_time = line_time;
8004 radeon_crtc->wm_high = latency_watermark_a;
8005 radeon_crtc->wm_low = latency_watermark_b;
8009 * dce8_bandwidth_update - program display watermarks
8011 * @rdev: radeon_device pointer
8013 * Calculate and program the display watermarks and line
8014 * buffer allocation (CIK).
8016 void dce8_bandwidth_update(struct radeon_device *rdev)
8018 struct drm_display_mode *mode = NULL;
8019 u32 num_heads = 0, lb_size;
8022 radeon_update_display_priority(rdev);
8024 for (i = 0; i < rdev->num_crtc; i++) {
8025 if (rdev->mode_info.crtcs[i]->base.enabled)
8028 for (i = 0; i < rdev->num_crtc; i++) {
8029 mode = &rdev->mode_info.crtcs[i]->base.mode;
8030 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8031 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8036 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8038 * @rdev: radeon_device pointer
8040 * Fetches a GPU clock counter snapshot (SI).
8041 * Returns the 64 bit clock counter snapshot.
8043 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8047 mutex_lock(&rdev->gpu_clock_mutex);
8048 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8049 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8050 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8051 mutex_unlock(&rdev->gpu_clock_mutex);
8055 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8056 u32 cntl_reg, u32 status_reg)
8059 struct atom_clock_dividers dividers;
8062 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8063 clock, false, ÷rs);
8067 tmp = RREG32_SMC(cntl_reg);
8068 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8069 tmp |= dividers.post_divider;
8070 WREG32_SMC(cntl_reg, tmp);
8072 for (i = 0; i < 100; i++) {
8073 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8083 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8087 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8091 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8095 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8097 struct pci_dev *root = rdev->pdev->bus->self;
8098 int bridge_pos, gpu_pos;
8099 u32 speed_cntl, mask, current_data_rate;
8103 if (radeon_pcie_gen2 == 0)
8106 if (rdev->flags & RADEON_IS_IGP)
8109 if (!(rdev->flags & RADEON_IS_PCIE))
8112 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8116 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8119 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8120 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8121 LC_CURRENT_DATA_RATE_SHIFT;
8122 if (mask & DRM_PCIE_SPEED_80) {
8123 if (current_data_rate == 2) {
8124 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8127 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8128 } else if (mask & DRM_PCIE_SPEED_50) {
8129 if (current_data_rate == 1) {
8130 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8133 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8136 bridge_pos = pci_pcie_cap(root);
8140 gpu_pos = pci_pcie_cap(rdev->pdev);
8144 if (mask & DRM_PCIE_SPEED_80) {
8145 /* re-try equalization if gen3 is not already enabled */
8146 if (current_data_rate != 2) {
8147 u16 bridge_cfg, gpu_cfg;
8148 u16 bridge_cfg2, gpu_cfg2;
8149 u32 max_lw, current_lw, tmp;
8151 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8152 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8154 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8155 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8157 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8158 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8160 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8161 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8162 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8164 if (current_lw < max_lw) {
8165 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8166 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8167 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8168 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8169 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8170 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8174 for (i = 0; i < 10; i++) {
8176 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8177 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8180 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8181 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8183 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8184 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8186 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187 tmp |= LC_SET_QUIESCE;
8188 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8190 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8192 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8197 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8198 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8199 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8200 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8202 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8203 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8204 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8205 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8208 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8209 tmp16 &= ~((1 << 4) | (7 << 9));
8210 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8211 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8213 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8214 tmp16 &= ~((1 << 4) | (7 << 9));
8215 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8216 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8218 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8219 tmp &= ~LC_SET_QUIESCE;
8220 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8225 /* set the link speed */
8226 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8227 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8228 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8230 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8232 if (mask & DRM_PCIE_SPEED_80)
8233 tmp16 |= 3; /* gen3 */
8234 else if (mask & DRM_PCIE_SPEED_50)
8235 tmp16 |= 2; /* gen2 */
8237 tmp16 |= 1; /* gen1 */
8238 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8240 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8241 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8242 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8244 for (i = 0; i < rdev->usec_timeout; i++) {
8245 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8246 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8252 static void cik_program_aspm(struct radeon_device *rdev)
8255 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8256 bool disable_clkreq = false;
8258 if (radeon_aspm == 0)
8261 /* XXX double check IGPs */
8262 if (rdev->flags & RADEON_IS_IGP)
8265 if (!(rdev->flags & RADEON_IS_PCIE))
8268 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8269 data &= ~LC_XMIT_N_FTS_MASK;
8270 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8272 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8274 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8275 data |= LC_GO_TO_RECOVERY;
8277 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8279 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8280 data |= P_IGNORE_EDB_ERR;
8282 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8284 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8285 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8286 data |= LC_PMI_TO_L1_DIS;
8288 data |= LC_L0S_INACTIVITY(7);
8291 data |= LC_L1_INACTIVITY(7);
8292 data &= ~LC_PMI_TO_L1_DIS;
8294 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8296 if (!disable_plloff_in_l1) {
8297 bool clk_req_support;
8299 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8300 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8301 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8303 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8305 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8306 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8307 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8309 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8311 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8312 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8313 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8315 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8317 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8318 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8319 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8321 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8323 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8324 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8325 data |= LC_DYN_LANES_PWR_STATE(3);
8327 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8329 if (!disable_clkreq) {
8330 struct pci_dev *root = rdev->pdev->bus->self;
8333 clk_req_support = false;
8334 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8335 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8336 clk_req_support = true;
8338 clk_req_support = false;
8341 if (clk_req_support) {
8342 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8343 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8345 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8347 orig = data = RREG32_SMC(THM_CLK_CNTL);
8348 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8349 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8351 WREG32_SMC(THM_CLK_CNTL, data);
8353 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8354 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8355 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8357 WREG32_SMC(MISC_CLK_CTRL, data);
8359 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8360 data &= ~BCLK_AS_XCLK;
8362 WREG32_SMC(CG_CLKPIN_CNTL, data);
8364 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8365 data &= ~FORCE_BIF_REFCLK_EN;
8367 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8369 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8370 data &= ~MPLL_CLKOUT_SEL_MASK;
8371 data |= MPLL_CLKOUT_SEL(4);
8373 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8378 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8381 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8382 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8384 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8387 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8388 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8389 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8390 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8391 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8392 data &= ~LC_L0S_INACTIVITY_MASK;
8394 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);