e8ec15dfe5f8d94dc242b7478fd1c9f1ee58ddd4
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570         if (index < rdev->doorbell.num_doorbells) {
1571                 return readl(rdev->doorbell.ptr + index);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590         if (index < rdev->doorbell.num_doorbells) {
1591                 writel(v, rdev->doorbell.ptr + index);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432                 }
2433         } else if (num_pipe_configs == 4) {
2434                 if (num_rbs == 4) {
2435                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436                                 switch (reg_offset) {
2437                                 case 0:
2438                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442                                         break;
2443                                 case 1:
2444                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448                                         break;
2449                                 case 2:
2450                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454                                         break;
2455                                 case 3:
2456                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460                                         break;
2461                                 case 4:
2462                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                                          TILE_SPLIT(split_equal_to_row_size));
2466                                         break;
2467                                 case 5:
2468                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                                         break;
2471                                 case 6:
2472                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476                                         break;
2477                                 case 7:
2478                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                                          TILE_SPLIT(split_equal_to_row_size));
2482                                         break;
2483                                 case 8:
2484                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486                                         break;
2487                                 case 9:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490                                         break;
2491                                 case 10:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                                         break;
2497                                 case 11:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                                         break;
2503                                 case 12:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                                         break;
2509                                 case 13:
2510                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512                                         break;
2513                                 case 14:
2514                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                                         break;
2519                                 case 16:
2520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                                         break;
2525                                 case 17:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 27:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534                                         break;
2535                                 case 28:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 29:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 30:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 default:
2554                                         gb_tile_moden = 0;
2555                                         break;
2556                                 }
2557                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559                         }
2560                 } else if (num_rbs < 4) {
2561                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562                                 switch (reg_offset) {
2563                                 case 0:
2564                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568                                         break;
2569                                 case 1:
2570                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574                                         break;
2575                                 case 2:
2576                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580                                         break;
2581                                 case 3:
2582                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586                                         break;
2587                                 case 4:
2588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                          TILE_SPLIT(split_equal_to_row_size));
2592                                         break;
2593                                 case 5:
2594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                                         break;
2597                                 case 6:
2598                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602                                         break;
2603                                 case 7:
2604                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                          TILE_SPLIT(split_equal_to_row_size));
2608                                         break;
2609                                 case 8:
2610                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612                                         break;
2613                                 case 9:
2614                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616                                         break;
2617                                 case 10:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                         break;
2623                                 case 11:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                         break;
2629                                 case 12:
2630                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                         break;
2635                                 case 13:
2636                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638                                         break;
2639                                 case 14:
2640                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                         break;
2645                                 case 16:
2646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                         break;
2651                                 case 17:
2652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                         break;
2657                                 case 27:
2658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660                                         break;
2661                                 case 28:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 29:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 30:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                         break;
2679                                 default:
2680                                         gb_tile_moden = 0;
2681                                         break;
2682                                 }
2683                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685                         }
2686                 }
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688                         switch (reg_offset) {
2689                         case 0:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                                 break;
2695                         case 1:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 2:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 3:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 4:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 5:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 6:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 8:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 9:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 10:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 11:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2754                                 break;
2755                         case 12:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2760                                 break;
2761                         case 13:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2766                                 break;
2767                         case 14:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2772                                 break;
2773                         default:
2774                                 gb_tile_moden = 0;
2775                                 break;
2776                         }
2777                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779                 }
2780         } else if (num_pipe_configs == 2) {
2781                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782                         switch (reg_offset) {
2783                         case 0:
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788                                 break;
2789                         case 1:
2790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794                                 break;
2795                         case 2:
2796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2799                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800                                 break;
2801                         case 3:
2802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806                                 break;
2807                         case 4:
2808                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2811                                                  TILE_SPLIT(split_equal_to_row_size));
2812                                 break;
2813                         case 5:
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                                 break;
2817                         case 6:
2818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822                                 break;
2823                         case 7:
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2827                                                  TILE_SPLIT(split_equal_to_row_size));
2828                                 break;
2829                         case 8:
2830                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831                                 break;
2832                         case 9:
2833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835                                 break;
2836                         case 10:
2837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                                 break;
2842                         case 11:
2843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                                 break;
2848                         case 12:
2849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                                 break;
2854                         case 13:
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857                                 break;
2858                         case 14:
2859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                                 break;
2864                         case 16:
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                 break;
2870                         case 17:
2871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                 break;
2876                         case 27:
2877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879                                 break;
2880                         case 28:
2881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                 break;
2886                         case 29:
2887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2890                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                 break;
2892                         case 30:
2893                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2896                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                                 break;
2898                         default:
2899                                 gb_tile_moden = 0;
2900                                 break;
2901                         }
2902                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904                 }
2905                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906                         switch (reg_offset) {
2907                         case 0:
2908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                                 break;
2913                         case 1:
2914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2918                                 break;
2919                         case 2:
2920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2924                                 break;
2925                         case 3:
2926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2930                                 break;
2931                         case 4:
2932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2936                                 break;
2937                         case 5:
2938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2942                                 break;
2943                         case 6:
2944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2948                                 break;
2949                         case 8:
2950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2954                                 break;
2955                         case 9:
2956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2960                                 break;
2961                         case 10:
2962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2966                                 break;
2967                         case 11:
2968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2972                                 break;
2973                         case 12:
2974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2978                                 break;
2979                         case 13:
2980                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2984                                 break;
2985                         case 14:
2986                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2990                                 break;
2991                         default:
2992                                 gb_tile_moden = 0;
2993                                 break;
2994                         }
2995                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                 }
2998         } else
2999                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014                              u32 se_num, u32 sh_num)
3015 {
3016         u32 data = INSTANCE_BROADCAST_WRITES;
3017
3018         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020         else if (se_num == 0xffffffff)
3021                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022         else if (sh_num == 0xffffffff)
3023                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024         else
3025                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026         WREG32(GRBM_GFX_INDEX, data);
3027 }
3028
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039         u32 i, mask = 0;
3040
3041         for (i = 0; i < bit_width; i++) {
3042                 mask <<= 1;
3043                 mask |= 1;
3044         }
3045         return mask;
3046 }
3047
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060                               u32 max_rb_num_per_se,
3061                               u32 sh_per_se)
3062 {
3063         u32 data, mask;
3064
3065         data = RREG32(CC_RB_BACKEND_DISABLE);
3066         if (data & 1)
3067                 data &= BACKEND_DISABLE_MASK;
3068         else
3069                 data = 0;
3070         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071
3072         data >>= BACKEND_DISABLE_SHIFT;
3073
3074         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075
3076         return data & mask;
3077 }
3078
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090                          u32 se_num, u32 sh_per_se,
3091                          u32 max_rb_num_per_se)
3092 {
3093         int i, j;
3094         u32 data, mask;
3095         u32 disabled_rbs = 0;
3096         u32 enabled_rbs = 0;
3097
3098         for (i = 0; i < se_num; i++) {
3099                 for (j = 0; j < sh_per_se; j++) {
3100                         cik_select_se_sh(rdev, i, j);
3101                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102                         if (rdev->family == CHIP_HAWAII)
3103                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104                         else
3105                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106                 }
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109
3110         mask = 1;
3111         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112                 if (!(disabled_rbs & mask))
3113                         enabled_rbs |= mask;
3114                 mask <<= 1;
3115         }
3116
3117         rdev->config.cik.backend_enable_mask = enabled_rbs;
3118
3119         for (i = 0; i < se_num; i++) {
3120                 cik_select_se_sh(rdev, i, 0xffffffff);
3121                 data = 0;
3122                 for (j = 0; j < sh_per_se; j++) {
3123                         switch (enabled_rbs & 3) {
3124                         case 0:
3125                                 if (j == 0)
3126                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127                                 else
3128                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129                                 break;
3130                         case 1:
3131                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132                                 break;
3133                         case 2:
3134                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135                                 break;
3136                         case 3:
3137                         default:
3138                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139                                 break;
3140                         }
3141                         enabled_rbs >>= 2;
3142                 }
3143                 WREG32(PA_SC_RASTER_CONFIG, data);
3144         }
3145         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159         u32 mc_shared_chmap, mc_arb_ramcfg;
3160         u32 hdp_host_path_cntl;
3161         u32 tmp;
3162         int i, j;
3163
3164         switch (rdev->family) {
3165         case CHIP_BONAIRE:
3166                 rdev->config.cik.max_shader_engines = 2;
3167                 rdev->config.cik.max_tile_pipes = 4;
3168                 rdev->config.cik.max_cu_per_sh = 7;
3169                 rdev->config.cik.max_sh_per_se = 1;
3170                 rdev->config.cik.max_backends_per_se = 2;
3171                 rdev->config.cik.max_texture_channel_caches = 4;
3172                 rdev->config.cik.max_gprs = 256;
3173                 rdev->config.cik.max_gs_threads = 32;
3174                 rdev->config.cik.max_hw_contexts = 8;
3175
3176                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181                 break;
3182         case CHIP_HAWAII:
3183                 rdev->config.cik.max_shader_engines = 4;
3184                 rdev->config.cik.max_tile_pipes = 16;
3185                 rdev->config.cik.max_cu_per_sh = 11;
3186                 rdev->config.cik.max_sh_per_se = 1;
3187                 rdev->config.cik.max_backends_per_se = 4;
3188                 rdev->config.cik.max_texture_channel_caches = 16;
3189                 rdev->config.cik.max_gprs = 256;
3190                 rdev->config.cik.max_gs_threads = 32;
3191                 rdev->config.cik.max_hw_contexts = 8;
3192
3193                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198                 break;
3199         case CHIP_KAVERI:
3200                 rdev->config.cik.max_shader_engines = 1;
3201                 rdev->config.cik.max_tile_pipes = 4;
3202                 if ((rdev->pdev->device == 0x1304) ||
3203                     (rdev->pdev->device == 0x1305) ||
3204                     (rdev->pdev->device == 0x130C) ||
3205                     (rdev->pdev->device == 0x130F) ||
3206                     (rdev->pdev->device == 0x1310) ||
3207                     (rdev->pdev->device == 0x1311) ||
3208                     (rdev->pdev->device == 0x131C)) {
3209                         rdev->config.cik.max_cu_per_sh = 8;
3210                         rdev->config.cik.max_backends_per_se = 2;
3211                 } else if ((rdev->pdev->device == 0x1309) ||
3212                            (rdev->pdev->device == 0x130A) ||
3213                            (rdev->pdev->device == 0x130D) ||
3214                            (rdev->pdev->device == 0x1313) ||
3215                            (rdev->pdev->device == 0x131D)) {
3216                         rdev->config.cik.max_cu_per_sh = 6;
3217                         rdev->config.cik.max_backends_per_se = 2;
3218                 } else if ((rdev->pdev->device == 0x1306) ||
3219                            (rdev->pdev->device == 0x1307) ||
3220                            (rdev->pdev->device == 0x130B) ||
3221                            (rdev->pdev->device == 0x130E) ||
3222                            (rdev->pdev->device == 0x1315) ||
3223                            (rdev->pdev->device == 0x131B)) {
3224                         rdev->config.cik.max_cu_per_sh = 4;
3225                         rdev->config.cik.max_backends_per_se = 1;
3226                 } else {
3227                         rdev->config.cik.max_cu_per_sh = 3;
3228                         rdev->config.cik.max_backends_per_se = 1;
3229                 }
3230                 rdev->config.cik.max_sh_per_se = 1;
3231                 rdev->config.cik.max_texture_channel_caches = 4;
3232                 rdev->config.cik.max_gprs = 256;
3233                 rdev->config.cik.max_gs_threads = 16;
3234                 rdev->config.cik.max_hw_contexts = 8;
3235
3236                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                 break;
3242         case CHIP_KABINI:
3243         default:
3244                 rdev->config.cik.max_shader_engines = 1;
3245                 rdev->config.cik.max_tile_pipes = 2;
3246                 rdev->config.cik.max_cu_per_sh = 2;
3247                 rdev->config.cik.max_sh_per_se = 1;
3248                 rdev->config.cik.max_backends_per_se = 1;
3249                 rdev->config.cik.max_texture_channel_caches = 2;
3250                 rdev->config.cik.max_gprs = 256;
3251                 rdev->config.cik.max_gs_threads = 16;
3252                 rdev->config.cik.max_hw_contexts = 8;
3253
3254                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259                 break;
3260         }
3261
3262         /* Initialize HDP */
3263         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264                 WREG32((0x2c14 + j), 0x00000000);
3265                 WREG32((0x2c18 + j), 0x00000000);
3266                 WREG32((0x2c1c + j), 0x00000000);
3267                 WREG32((0x2c20 + j), 0x00000000);
3268                 WREG32((0x2c24 + j), 0x00000000);
3269         }
3270
3271         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272
3273         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274
3275         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277
3278         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279         rdev->config.cik.mem_max_burst_length_bytes = 256;
3280         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282         if (rdev->config.cik.mem_row_size_in_kb > 4)
3283                 rdev->config.cik.mem_row_size_in_kb = 4;
3284         /* XXX use MC settings? */
3285         rdev->config.cik.shader_engine_tile_size = 32;
3286         rdev->config.cik.num_gpus = 1;
3287         rdev->config.cik.multi_gpu_tile_size = 64;
3288
3289         /* fix up row size */
3290         gb_addr_config &= ~ROW_SIZE_MASK;
3291         switch (rdev->config.cik.mem_row_size_in_kb) {
3292         case 1:
3293         default:
3294                 gb_addr_config |= ROW_SIZE(0);
3295                 break;
3296         case 2:
3297                 gb_addr_config |= ROW_SIZE(1);
3298                 break;
3299         case 4:
3300                 gb_addr_config |= ROW_SIZE(2);
3301                 break;
3302         }
3303
3304         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3305          * not have bank info, so create a custom tiling dword.
3306          * bits 3:0   num_pipes
3307          * bits 7:4   num_banks
3308          * bits 11:8  group_size
3309          * bits 15:12 row_size
3310          */
3311         rdev->config.cik.tile_config = 0;
3312         switch (rdev->config.cik.num_tile_pipes) {
3313         case 1:
3314                 rdev->config.cik.tile_config |= (0 << 0);
3315                 break;
3316         case 2:
3317                 rdev->config.cik.tile_config |= (1 << 0);
3318                 break;
3319         case 4:
3320                 rdev->config.cik.tile_config |= (2 << 0);
3321                 break;
3322         case 8:
3323         default:
3324                 /* XXX what about 12? */
3325                 rdev->config.cik.tile_config |= (3 << 0);
3326                 break;
3327         }
3328         rdev->config.cik.tile_config |=
3329                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330         rdev->config.cik.tile_config |=
3331                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332         rdev->config.cik.tile_config |=
3333                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334
3335         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343
3344         cik_tiling_mode_table_init(rdev);
3345
3346         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347                      rdev->config.cik.max_sh_per_se,
3348                      rdev->config.cik.max_backends_per_se);
3349
3350         /* set HW defaults for 3D engine */
3351         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352
3353         WREG32(SX_DEBUG_1, 0x20);
3354
3355         WREG32(TA_CNTL_AUX, 0x00010000);
3356
3357         tmp = RREG32(SPI_CONFIG_CNTL);
3358         tmp |= 0x03000000;
3359         WREG32(SPI_CONFIG_CNTL, tmp);
3360
3361         WREG32(SQ_CONFIG, 1);
3362
3363         WREG32(DB_DEBUG, 0);
3364
3365         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366         tmp |= 0x00000400;
3367         WREG32(DB_DEBUG2, tmp);
3368
3369         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370         tmp |= 0x00020200;
3371         WREG32(DB_DEBUG3, tmp);
3372
3373         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374         tmp |= 0x00018208;
3375         WREG32(CB_HW_CONTROL, tmp);
3376
3377         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378
3379         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383
3384         WREG32(VGT_NUM_INSTANCES, 1);
3385
3386         WREG32(CP_PERFMON_CNTL, 0);
3387
3388         WREG32(SQ_CONFIG, 0);
3389
3390         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391                                           FORCE_EOV_MAX_REZ_CNT(255)));
3392
3393         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395
3396         WREG32(VGT_GS_VERTEX_REUSE, 16);
3397         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398
3399         tmp = RREG32(HDP_MISC_CNTL);
3400         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401         WREG32(HDP_MISC_CNTL, tmp);
3402
3403         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405
3406         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408
3409         udelay(50);
3410 }
3411
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427         int i;
3428
3429         rdev->scratch.num_reg = 7;
3430         rdev->scratch.reg_base = SCRATCH_REG0;
3431         for (i = 0; i < rdev->scratch.num_reg; i++) {
3432                 rdev->scratch.free[i] = true;
3433                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434         }
3435 }
3436
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450         uint32_t scratch;
3451         uint32_t tmp = 0;
3452         unsigned i;
3453         int r;
3454
3455         r = radeon_scratch_get(rdev, &scratch);
3456         if (r) {
3457                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458                 return r;
3459         }
3460         WREG32(scratch, 0xCAFEDEAD);
3461         r = radeon_ring_lock(rdev, ring, 3);
3462         if (r) {
3463                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464                 radeon_scratch_free(rdev, scratch);
3465                 return r;
3466         }
3467         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469         radeon_ring_write(ring, 0xDEADBEEF);
3470         radeon_ring_unlock_commit(rdev, ring);
3471
3472         for (i = 0; i < rdev->usec_timeout; i++) {
3473                 tmp = RREG32(scratch);
3474                 if (tmp == 0xDEADBEEF)
3475                         break;
3476                 DRM_UDELAY(1);
3477         }
3478         if (i < rdev->usec_timeout) {
3479                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480         } else {
3481                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482                           ring->idx, scratch, tmp);
3483                 r = -EINVAL;
3484         }
3485         radeon_scratch_free(rdev, scratch);
3486         return r;
3487 }
3488
3489 /**
3490  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3491  *
3492  * @rdev: radeon_device pointer
3493  * @ridx: radeon ring index
3494  *
3495  * Emits an hdp flush on the cp.
3496  */
3497 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3498                                        int ridx)
3499 {
3500         struct radeon_ring *ring = &rdev->ring[ridx];
3501         u32 ref_and_mask;
3502
3503         switch (ring->idx) {
3504         case CAYMAN_RING_TYPE_CP1_INDEX:
3505         case CAYMAN_RING_TYPE_CP2_INDEX:
3506         default:
3507                 switch (ring->me) {
3508                 case 0:
3509                         ref_and_mask = CP2 << ring->pipe;
3510                         break;
3511                 case 1:
3512                         ref_and_mask = CP6 << ring->pipe;
3513                         break;
3514                 default:
3515                         return;
3516                 }
3517                 break;
3518         case RADEON_RING_TYPE_GFX_INDEX:
3519                 ref_and_mask = CP0;
3520                 break;
3521         }
3522
3523         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3524         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3525                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3526                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3527         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3528         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3529         radeon_ring_write(ring, ref_and_mask);
3530         radeon_ring_write(ring, ref_and_mask);
3531         radeon_ring_write(ring, 0x20); /* poll interval */
3532 }
3533
3534 /**
3535  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3536  *
3537  * @rdev: radeon_device pointer
3538  * @fence: radeon fence object
3539  *
3540  * Emits a fence sequnce number on the gfx ring and flushes
3541  * GPU caches.
3542  */
3543 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3544                              struct radeon_fence *fence)
3545 {
3546         struct radeon_ring *ring = &rdev->ring[fence->ring];
3547         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3548
3549         /* EVENT_WRITE_EOP - flush caches, send int */
3550         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3551         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3552                                  EOP_TC_ACTION_EN |
3553                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3554                                  EVENT_INDEX(5)));
3555         radeon_ring_write(ring, addr & 0xfffffffc);
3556         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3557         radeon_ring_write(ring, fence->seq);
3558         radeon_ring_write(ring, 0);
3559         /* HDP flush */
3560         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3561 }
3562
3563 /**
3564  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3565  *
3566  * @rdev: radeon_device pointer
3567  * @fence: radeon fence object
3568  *
3569  * Emits a fence sequnce number on the compute ring and flushes
3570  * GPU caches.
3571  */
3572 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3573                                  struct radeon_fence *fence)
3574 {
3575         struct radeon_ring *ring = &rdev->ring[fence->ring];
3576         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3577
3578         /* RELEASE_MEM - flush caches, send int */
3579         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3580         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3581                                  EOP_TC_ACTION_EN |
3582                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3583                                  EVENT_INDEX(5)));
3584         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3585         radeon_ring_write(ring, addr & 0xfffffffc);
3586         radeon_ring_write(ring, upper_32_bits(addr));
3587         radeon_ring_write(ring, fence->seq);
3588         radeon_ring_write(ring, 0);
3589         /* HDP flush */
3590         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3591 }
3592
3593 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3594                              struct radeon_ring *ring,
3595                              struct radeon_semaphore *semaphore,
3596                              bool emit_wait)
3597 {
3598         uint64_t addr = semaphore->gpu_addr;
3599         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3600
3601         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3602         radeon_ring_write(ring, addr & 0xffffffff);
3603         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3604
3605         return true;
3606 }
3607
3608 /**
3609  * cik_copy_cpdma - copy pages using the CP DMA engine
3610  *
3611  * @rdev: radeon_device pointer
3612  * @src_offset: src GPU address
3613  * @dst_offset: dst GPU address
3614  * @num_gpu_pages: number of GPU pages to xfer
3615  * @fence: radeon fence object
3616  *
3617  * Copy GPU paging using the CP DMA engine (CIK+).
3618  * Used by the radeon ttm implementation to move pages if
3619  * registered as the asic copy callback.
3620  */
3621 int cik_copy_cpdma(struct radeon_device *rdev,
3622                    uint64_t src_offset, uint64_t dst_offset,
3623                    unsigned num_gpu_pages,
3624                    struct radeon_fence **fence)
3625 {
3626         struct radeon_semaphore *sem = NULL;
3627         int ring_index = rdev->asic->copy.blit_ring_index;
3628         struct radeon_ring *ring = &rdev->ring[ring_index];
3629         u32 size_in_bytes, cur_size_in_bytes, control;
3630         int i, num_loops;
3631         int r = 0;
3632
3633         r = radeon_semaphore_create(rdev, &sem);
3634         if (r) {
3635                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3636                 return r;
3637         }
3638
3639         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3640         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3641         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3642         if (r) {
3643                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3644                 radeon_semaphore_free(rdev, &sem, NULL);
3645                 return r;
3646         }
3647
3648         radeon_semaphore_sync_to(sem, *fence);
3649         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3650
3651         for (i = 0; i < num_loops; i++) {
3652                 cur_size_in_bytes = size_in_bytes;
3653                 if (cur_size_in_bytes > 0x1fffff)
3654                         cur_size_in_bytes = 0x1fffff;
3655                 size_in_bytes -= cur_size_in_bytes;
3656                 control = 0;
3657                 if (size_in_bytes == 0)
3658                         control |= PACKET3_DMA_DATA_CP_SYNC;
3659                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3660                 radeon_ring_write(ring, control);
3661                 radeon_ring_write(ring, lower_32_bits(src_offset));
3662                 radeon_ring_write(ring, upper_32_bits(src_offset));
3663                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3664                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3665                 radeon_ring_write(ring, cur_size_in_bytes);
3666                 src_offset += cur_size_in_bytes;
3667                 dst_offset += cur_size_in_bytes;
3668         }
3669
3670         r = radeon_fence_emit(rdev, fence, ring->idx);
3671         if (r) {
3672                 radeon_ring_unlock_undo(rdev, ring);
3673                 return r;
3674         }
3675
3676         radeon_ring_unlock_commit(rdev, ring);
3677         radeon_semaphore_free(rdev, &sem, *fence);
3678
3679         return r;
3680 }
3681
3682 /*
3683  * IB stuff
3684  */
3685 /**
3686  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3687  *
3688  * @rdev: radeon_device pointer
3689  * @ib: radeon indirect buffer object
3690  *
3691  * Emits an DE (drawing engine) or CE (constant engine) IB
3692  * on the gfx ring.  IBs are usually generated by userspace
3693  * acceleration drivers and submitted to the kernel for
3694  * sheduling on the ring.  This function schedules the IB
3695  * on the gfx ring for execution by the GPU.
3696  */
3697 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3698 {
3699         struct radeon_ring *ring = &rdev->ring[ib->ring];
3700         u32 header, control = INDIRECT_BUFFER_VALID;
3701
3702         if (ib->is_const_ib) {
3703                 /* set switch buffer packet before const IB */
3704                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3705                 radeon_ring_write(ring, 0);
3706
3707                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3708         } else {
3709                 u32 next_rptr;
3710                 if (ring->rptr_save_reg) {
3711                         next_rptr = ring->wptr + 3 + 4;
3712                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3713                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3714                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3715                         radeon_ring_write(ring, next_rptr);
3716                 } else if (rdev->wb.enabled) {
3717                         next_rptr = ring->wptr + 5 + 4;
3718                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3719                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3720                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3721                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3722                         radeon_ring_write(ring, next_rptr);
3723                 }
3724
3725                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3726         }
3727
3728         control |= ib->length_dw |
3729                 (ib->vm ? (ib->vm->id << 24) : 0);
3730
3731         radeon_ring_write(ring, header);
3732         radeon_ring_write(ring,
3733 #ifdef __BIG_ENDIAN
3734                           (2 << 0) |
3735 #endif
3736                           (ib->gpu_addr & 0xFFFFFFFC));
3737         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3738         radeon_ring_write(ring, control);
3739 }
3740
3741 /**
3742  * cik_ib_test - basic gfx ring IB test
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Allocate an IB and execute it on the gfx ring (CIK).
3748  * Provides a basic gfx ring test to verify that IBs are working.
3749  * Returns 0 on success, error on failure.
3750  */
3751 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3752 {
3753         struct radeon_ib ib;
3754         uint32_t scratch;
3755         uint32_t tmp = 0;
3756         unsigned i;
3757         int r;
3758
3759         r = radeon_scratch_get(rdev, &scratch);
3760         if (r) {
3761                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3762                 return r;
3763         }
3764         WREG32(scratch, 0xCAFEDEAD);
3765         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3766         if (r) {
3767                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3768                 radeon_scratch_free(rdev, scratch);
3769                 return r;
3770         }
3771         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3772         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3773         ib.ptr[2] = 0xDEADBEEF;
3774         ib.length_dw = 3;
3775         r = radeon_ib_schedule(rdev, &ib, NULL);
3776         if (r) {
3777                 radeon_scratch_free(rdev, scratch);
3778                 radeon_ib_free(rdev, &ib);
3779                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3780                 return r;
3781         }
3782         r = radeon_fence_wait(ib.fence, false);
3783         if (r) {
3784                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3785                 radeon_scratch_free(rdev, scratch);
3786                 radeon_ib_free(rdev, &ib);
3787                 return r;
3788         }
3789         for (i = 0; i < rdev->usec_timeout; i++) {
3790                 tmp = RREG32(scratch);
3791                 if (tmp == 0xDEADBEEF)
3792                         break;
3793                 DRM_UDELAY(1);
3794         }
3795         if (i < rdev->usec_timeout) {
3796                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3797         } else {
3798                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3799                           scratch, tmp);
3800                 r = -EINVAL;
3801         }
3802         radeon_scratch_free(rdev, scratch);
3803         radeon_ib_free(rdev, &ib);
3804         return r;
3805 }
3806
3807 /*
3808  * CP.
3809  * On CIK, gfx and compute now have independant command processors.
3810  *
3811  * GFX
3812  * Gfx consists of a single ring and can process both gfx jobs and
3813  * compute jobs.  The gfx CP consists of three microengines (ME):
3814  * PFP - Pre-Fetch Parser
3815  * ME - Micro Engine
3816  * CE - Constant Engine
3817  * The PFP and ME make up what is considered the Drawing Engine (DE).
3818  * The CE is an asynchronous engine used for updating buffer desciptors
3819  * used by the DE so that they can be loaded into cache in parallel
3820  * while the DE is processing state update packets.
3821  *
3822  * Compute
3823  * The compute CP consists of two microengines (ME):
3824  * MEC1 - Compute MicroEngine 1
3825  * MEC2 - Compute MicroEngine 2
3826  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3827  * The queues are exposed to userspace and are programmed directly
3828  * by the compute runtime.
3829  */
3830 /**
3831  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3832  *
3833  * @rdev: radeon_device pointer
3834  * @enable: enable or disable the MEs
3835  *
3836  * Halts or unhalts the gfx MEs.
3837  */
3838 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3839 {
3840         if (enable)
3841                 WREG32(CP_ME_CNTL, 0);
3842         else {
3843                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3844                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3845         }
3846         udelay(50);
3847 }
3848
3849 /**
3850  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3851  *
3852  * @rdev: radeon_device pointer
3853  *
3854  * Loads the gfx PFP, ME, and CE ucode.
3855  * Returns 0 for success, -EINVAL if the ucode is not available.
3856  */
3857 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3858 {
3859         const __be32 *fw_data;
3860         int i;
3861
3862         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3863                 return -EINVAL;
3864
3865         cik_cp_gfx_enable(rdev, false);
3866
3867         /* PFP */
3868         fw_data = (const __be32 *)rdev->pfp_fw->data;
3869         WREG32(CP_PFP_UCODE_ADDR, 0);
3870         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3871                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3872         WREG32(CP_PFP_UCODE_ADDR, 0);
3873
3874         /* CE */
3875         fw_data = (const __be32 *)rdev->ce_fw->data;
3876         WREG32(CP_CE_UCODE_ADDR, 0);
3877         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3878                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3879         WREG32(CP_CE_UCODE_ADDR, 0);
3880
3881         /* ME */
3882         fw_data = (const __be32 *)rdev->me_fw->data;
3883         WREG32(CP_ME_RAM_WADDR, 0);
3884         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3885                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3886         WREG32(CP_ME_RAM_WADDR, 0);
3887
3888         WREG32(CP_PFP_UCODE_ADDR, 0);
3889         WREG32(CP_CE_UCODE_ADDR, 0);
3890         WREG32(CP_ME_RAM_WADDR, 0);
3891         WREG32(CP_ME_RAM_RADDR, 0);
3892         return 0;
3893 }
3894
3895 /**
3896  * cik_cp_gfx_start - start the gfx ring
3897  *
3898  * @rdev: radeon_device pointer
3899  *
3900  * Enables the ring and loads the clear state context and other
3901  * packets required to init the ring.
3902  * Returns 0 for success, error for failure.
3903  */
3904 static int cik_cp_gfx_start(struct radeon_device *rdev)
3905 {
3906         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3907         int r, i;
3908
3909         /* init the CP */
3910         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3911         WREG32(CP_ENDIAN_SWAP, 0);
3912         WREG32(CP_DEVICE_ID, 1);
3913
3914         cik_cp_gfx_enable(rdev, true);
3915
3916         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3917         if (r) {
3918                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3919                 return r;
3920         }
3921
3922         /* init the CE partitions.  CE only used for gfx on CIK */
3923         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3924         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3925         radeon_ring_write(ring, 0xc000);
3926         radeon_ring_write(ring, 0xc000);
3927
3928         /* setup clear context state */
3929         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3930         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3931
3932         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3933         radeon_ring_write(ring, 0x80000000);
3934         radeon_ring_write(ring, 0x80000000);
3935
3936         for (i = 0; i < cik_default_size; i++)
3937                 radeon_ring_write(ring, cik_default_state[i]);
3938
3939         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3940         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3941
3942         /* set clear context state */
3943         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3944         radeon_ring_write(ring, 0);
3945
3946         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3947         radeon_ring_write(ring, 0x00000316);
3948         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3949         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3950
3951         radeon_ring_unlock_commit(rdev, ring);
3952
3953         return 0;
3954 }
3955
3956 /**
3957  * cik_cp_gfx_fini - stop the gfx ring
3958  *
3959  * @rdev: radeon_device pointer
3960  *
3961  * Stop the gfx ring and tear down the driver ring
3962  * info.
3963  */
3964 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3965 {
3966         cik_cp_gfx_enable(rdev, false);
3967         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3968 }
3969
3970 /**
3971  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3972  *
3973  * @rdev: radeon_device pointer
3974  *
3975  * Program the location and size of the gfx ring buffer
3976  * and test it to make sure it's working.
3977  * Returns 0 for success, error for failure.
3978  */
3979 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3980 {
3981         struct radeon_ring *ring;
3982         u32 tmp;
3983         u32 rb_bufsz;
3984         u64 rb_addr;
3985         int r;
3986
3987         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3988         if (rdev->family != CHIP_HAWAII)
3989                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3990
3991         /* Set the write pointer delay */
3992         WREG32(CP_RB_WPTR_DELAY, 0);
3993
3994         /* set the RB to use vmid 0 */
3995         WREG32(CP_RB_VMID, 0);
3996
3997         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3998
3999         /* ring 0 - compute and gfx */
4000         /* Set ring buffer size */
4001         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4002         rb_bufsz = order_base_2(ring->ring_size / 8);
4003         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4004 #ifdef __BIG_ENDIAN
4005         tmp |= BUF_SWAP_32BIT;
4006 #endif
4007         WREG32(CP_RB0_CNTL, tmp);
4008
4009         /* Initialize the ring buffer's read and write pointers */
4010         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4011         ring->wptr = 0;
4012         WREG32(CP_RB0_WPTR, ring->wptr);
4013
4014         /* set the wb address wether it's enabled or not */
4015         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4016         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4017
4018         /* scratch register shadowing is no longer supported */
4019         WREG32(SCRATCH_UMSK, 0);
4020
4021         if (!rdev->wb.enabled)
4022                 tmp |= RB_NO_UPDATE;
4023
4024         mdelay(1);
4025         WREG32(CP_RB0_CNTL, tmp);
4026
4027         rb_addr = ring->gpu_addr >> 8;
4028         WREG32(CP_RB0_BASE, rb_addr);
4029         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4030
4031         ring->rptr = RREG32(CP_RB0_RPTR);
4032
4033         /* start the ring */
4034         cik_cp_gfx_start(rdev);
4035         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4036         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4037         if (r) {
4038                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4039                 return r;
4040         }
4041         return 0;
4042 }
4043
4044 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4045                      struct radeon_ring *ring)
4046 {
4047         u32 rptr;
4048
4049         if (rdev->wb.enabled)
4050                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4051         else
4052                 rptr = RREG32(CP_RB0_RPTR);
4053
4054         return rptr;
4055 }
4056
4057 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4058                      struct radeon_ring *ring)
4059 {
4060         u32 wptr;
4061
4062         wptr = RREG32(CP_RB0_WPTR);
4063
4064         return wptr;
4065 }
4066
4067 void cik_gfx_set_wptr(struct radeon_device *rdev,
4068                       struct radeon_ring *ring)
4069 {
4070         WREG32(CP_RB0_WPTR, ring->wptr);
4071         (void)RREG32(CP_RB0_WPTR);
4072 }
4073
4074 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4075                          struct radeon_ring *ring)
4076 {
4077         u32 rptr;
4078
4079         if (rdev->wb.enabled) {
4080                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4081         } else {
4082                 mutex_lock(&rdev->srbm_mutex);
4083                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4084                 rptr = RREG32(CP_HQD_PQ_RPTR);
4085                 cik_srbm_select(rdev, 0, 0, 0, 0);
4086                 mutex_unlock(&rdev->srbm_mutex);
4087         }
4088
4089         return rptr;
4090 }
4091
4092 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4093                          struct radeon_ring *ring)
4094 {
4095         u32 wptr;
4096
4097         if (rdev->wb.enabled) {
4098                 /* XXX check if swapping is necessary on BE */
4099                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4100         } else {
4101                 mutex_lock(&rdev->srbm_mutex);
4102                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4103                 wptr = RREG32(CP_HQD_PQ_WPTR);
4104                 cik_srbm_select(rdev, 0, 0, 0, 0);
4105                 mutex_unlock(&rdev->srbm_mutex);
4106         }
4107
4108         return wptr;
4109 }
4110
4111 void cik_compute_set_wptr(struct radeon_device *rdev,
4112                           struct radeon_ring *ring)
4113 {
4114         /* XXX check if swapping is necessary on BE */
4115         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4116         WDOORBELL32(ring->doorbell_index, ring->wptr);
4117 }
4118
4119 /**
4120  * cik_cp_compute_enable - enable/disable the compute CP MEs
4121  *
4122  * @rdev: radeon_device pointer
4123  * @enable: enable or disable the MEs
4124  *
4125  * Halts or unhalts the compute MEs.
4126  */
4127 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4128 {
4129         if (enable)
4130                 WREG32(CP_MEC_CNTL, 0);
4131         else
4132                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4133         udelay(50);
4134 }
4135
4136 /**
4137  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4138  *
4139  * @rdev: radeon_device pointer
4140  *
4141  * Loads the compute MEC1&2 ucode.
4142  * Returns 0 for success, -EINVAL if the ucode is not available.
4143  */
4144 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4145 {
4146         const __be32 *fw_data;
4147         int i;
4148
4149         if (!rdev->mec_fw)
4150                 return -EINVAL;
4151
4152         cik_cp_compute_enable(rdev, false);
4153
4154         /* MEC1 */
4155         fw_data = (const __be32 *)rdev->mec_fw->data;
4156         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4157         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4158                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4159         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4160
4161         if (rdev->family == CHIP_KAVERI) {
4162                 /* MEC2 */
4163                 fw_data = (const __be32 *)rdev->mec_fw->data;
4164                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4165                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4166                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4167                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4168         }
4169
4170         return 0;
4171 }
4172
4173 /**
4174  * cik_cp_compute_start - start the compute queues
4175  *
4176  * @rdev: radeon_device pointer
4177  *
4178  * Enable the compute queues.
4179  * Returns 0 for success, error for failure.
4180  */
4181 static int cik_cp_compute_start(struct radeon_device *rdev)
4182 {
4183         cik_cp_compute_enable(rdev, true);
4184
4185         return 0;
4186 }
4187
4188 /**
4189  * cik_cp_compute_fini - stop the compute queues
4190  *
4191  * @rdev: radeon_device pointer
4192  *
4193  * Stop the compute queues and tear down the driver queue
4194  * info.
4195  */
4196 static void cik_cp_compute_fini(struct radeon_device *rdev)
4197 {
4198         int i, idx, r;
4199
4200         cik_cp_compute_enable(rdev, false);
4201
4202         for (i = 0; i < 2; i++) {
4203                 if (i == 0)
4204                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4205                 else
4206                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4207
4208                 if (rdev->ring[idx].mqd_obj) {
4209                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4210                         if (unlikely(r != 0))
4211                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4212
4213                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4214                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4215
4216                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4217                         rdev->ring[idx].mqd_obj = NULL;
4218                 }
4219         }
4220 }
4221
4222 static void cik_mec_fini(struct radeon_device *rdev)
4223 {
4224         int r;
4225
4226         if (rdev->mec.hpd_eop_obj) {
4227                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4228                 if (unlikely(r != 0))
4229                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4230                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4231                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4232
4233                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4234                 rdev->mec.hpd_eop_obj = NULL;
4235         }
4236 }
4237
4238 #define MEC_HPD_SIZE 2048
4239
4240 static int cik_mec_init(struct radeon_device *rdev)
4241 {
4242         int r;
4243         u32 *hpd;
4244
4245         /*
4246          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4247          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4248          */
4249         if (rdev->family == CHIP_KAVERI)
4250                 rdev->mec.num_mec = 2;
4251         else
4252                 rdev->mec.num_mec = 1;
4253         rdev->mec.num_pipe = 4;
4254         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4255
4256         if (rdev->mec.hpd_eop_obj == NULL) {
4257                 r = radeon_bo_create(rdev,
4258                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4259                                      PAGE_SIZE, true,
4260                                      RADEON_GEM_DOMAIN_GTT, NULL,
4261                                      &rdev->mec.hpd_eop_obj);
4262                 if (r) {
4263                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4264                         return r;
4265                 }
4266         }
4267
4268         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4269         if (unlikely(r != 0)) {
4270                 cik_mec_fini(rdev);
4271                 return r;
4272         }
4273         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4274                           &rdev->mec.hpd_eop_gpu_addr);
4275         if (r) {
4276                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4277                 cik_mec_fini(rdev);
4278                 return r;
4279         }
4280         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4281         if (r) {
4282                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4283                 cik_mec_fini(rdev);
4284                 return r;
4285         }
4286
4287         /* clear memory.  Not sure if this is required or not */
4288         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4289
4290         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4291         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4292
4293         return 0;
4294 }
4295
4296 struct hqd_registers
4297 {
4298         u32 cp_mqd_base_addr;
4299         u32 cp_mqd_base_addr_hi;
4300         u32 cp_hqd_active;
4301         u32 cp_hqd_vmid;
4302         u32 cp_hqd_persistent_state;
4303         u32 cp_hqd_pipe_priority;
4304         u32 cp_hqd_queue_priority;
4305         u32 cp_hqd_quantum;
4306         u32 cp_hqd_pq_base;
4307         u32 cp_hqd_pq_base_hi;
4308         u32 cp_hqd_pq_rptr;
4309         u32 cp_hqd_pq_rptr_report_addr;
4310         u32 cp_hqd_pq_rptr_report_addr_hi;
4311         u32 cp_hqd_pq_wptr_poll_addr;
4312         u32 cp_hqd_pq_wptr_poll_addr_hi;
4313         u32 cp_hqd_pq_doorbell_control;
4314         u32 cp_hqd_pq_wptr;
4315         u32 cp_hqd_pq_control;
4316         u32 cp_hqd_ib_base_addr;
4317         u32 cp_hqd_ib_base_addr_hi;
4318         u32 cp_hqd_ib_rptr;
4319         u32 cp_hqd_ib_control;
4320         u32 cp_hqd_iq_timer;
4321         u32 cp_hqd_iq_rptr;
4322         u32 cp_hqd_dequeue_request;
4323         u32 cp_hqd_dma_offload;
4324         u32 cp_hqd_sema_cmd;
4325         u32 cp_hqd_msg_type;
4326         u32 cp_hqd_atomic0_preop_lo;
4327         u32 cp_hqd_atomic0_preop_hi;
4328         u32 cp_hqd_atomic1_preop_lo;
4329         u32 cp_hqd_atomic1_preop_hi;
4330         u32 cp_hqd_hq_scheduler0;
4331         u32 cp_hqd_hq_scheduler1;
4332         u32 cp_mqd_control;
4333 };
4334
4335 struct bonaire_mqd
4336 {
4337         u32 header;
4338         u32 dispatch_initiator;
4339         u32 dimensions[3];
4340         u32 start_idx[3];
4341         u32 num_threads[3];
4342         u32 pipeline_stat_enable;
4343         u32 perf_counter_enable;
4344         u32 pgm[2];
4345         u32 tba[2];
4346         u32 tma[2];
4347         u32 pgm_rsrc[2];
4348         u32 vmid;
4349         u32 resource_limits;
4350         u32 static_thread_mgmt01[2];
4351         u32 tmp_ring_size;
4352         u32 static_thread_mgmt23[2];
4353         u32 restart[3];
4354         u32 thread_trace_enable;
4355         u32 reserved1;
4356         u32 user_data[16];
4357         u32 vgtcs_invoke_count[2];
4358         struct hqd_registers queue_state;
4359         u32 dequeue_cntr;
4360         u32 interrupt_queue[64];
4361 };
4362
4363 /**
4364  * cik_cp_compute_resume - setup the compute queue registers
4365  *
4366  * @rdev: radeon_device pointer
4367  *
4368  * Program the compute queues and test them to make sure they
4369  * are working.
4370  * Returns 0 for success, error for failure.
4371  */
4372 static int cik_cp_compute_resume(struct radeon_device *rdev)
4373 {
4374         int r, i, idx;
4375         u32 tmp;
4376         bool use_doorbell = true;
4377         u64 hqd_gpu_addr;
4378         u64 mqd_gpu_addr;
4379         u64 eop_gpu_addr;
4380         u64 wb_gpu_addr;
4381         u32 *buf;
4382         struct bonaire_mqd *mqd;
4383
4384         r = cik_cp_compute_start(rdev);
4385         if (r)
4386                 return r;
4387
4388         /* fix up chicken bits */
4389         tmp = RREG32(CP_CPF_DEBUG);
4390         tmp |= (1 << 23);
4391         WREG32(CP_CPF_DEBUG, tmp);
4392
4393         /* init the pipes */
4394         mutex_lock(&rdev->srbm_mutex);
4395         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4396                 int me = (i < 4) ? 1 : 2;
4397                 int pipe = (i < 4) ? i : (i - 4);
4398
4399                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4400
4401                 cik_srbm_select(rdev, me, pipe, 0, 0);
4402
4403                 /* write the EOP addr */
4404                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4405                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4406
4407                 /* set the VMID assigned */
4408                 WREG32(CP_HPD_EOP_VMID, 0);
4409
4410                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4411                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4412                 tmp &= ~EOP_SIZE_MASK;
4413                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4414                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4415         }
4416         cik_srbm_select(rdev, 0, 0, 0, 0);
4417         mutex_unlock(&rdev->srbm_mutex);
4418
4419         /* init the queues.  Just two for now. */
4420         for (i = 0; i < 2; i++) {
4421                 if (i == 0)
4422                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4423                 else
4424                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4425
4426                 if (rdev->ring[idx].mqd_obj == NULL) {
4427                         r = radeon_bo_create(rdev,
4428                                              sizeof(struct bonaire_mqd),
4429                                              PAGE_SIZE, true,
4430                                              RADEON_GEM_DOMAIN_GTT, NULL,
4431                                              &rdev->ring[idx].mqd_obj);
4432                         if (r) {
4433                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4434                                 return r;
4435                         }
4436                 }
4437
4438                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4439                 if (unlikely(r != 0)) {
4440                         cik_cp_compute_fini(rdev);
4441                         return r;
4442                 }
4443                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4444                                   &mqd_gpu_addr);
4445                 if (r) {
4446                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4447                         cik_cp_compute_fini(rdev);
4448                         return r;
4449                 }
4450                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4451                 if (r) {
4452                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4453                         cik_cp_compute_fini(rdev);
4454                         return r;
4455                 }
4456
4457                 /* init the mqd struct */
4458                 memset(buf, 0, sizeof(struct bonaire_mqd));
4459
4460                 mqd = (struct bonaire_mqd *)buf;
4461                 mqd->header = 0xC0310800;
4462                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4463                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4464                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4465                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4466
4467                 mutex_lock(&rdev->srbm_mutex);
4468                 cik_srbm_select(rdev, rdev->ring[idx].me,
4469                                 rdev->ring[idx].pipe,
4470                                 rdev->ring[idx].queue, 0);
4471
4472                 /* disable wptr polling */
4473                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4474                 tmp &= ~WPTR_POLL_EN;
4475                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4476
4477                 /* enable doorbell? */
4478                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4479                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4480                 if (use_doorbell)
4481                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4482                 else
4483                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4484                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4485                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4486
4487                 /* disable the queue if it's active */
4488                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4489                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4490                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4491                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4492                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4493                         for (i = 0; i < rdev->usec_timeout; i++) {
4494                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4495                                         break;
4496                                 udelay(1);
4497                         }
4498                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4499                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4500                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4501                 }
4502
4503                 /* set the pointer to the MQD */
4504                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4505                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4506                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4507                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4508                 /* set MQD vmid to 0 */
4509                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4510                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4511                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4512
4513                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4514                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4515                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4516                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4517                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4518                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4519
4520                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4521                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4522                 mqd->queue_state.cp_hqd_pq_control &=
4523                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4524
4525                 mqd->queue_state.cp_hqd_pq_control |=
4526                         order_base_2(rdev->ring[idx].ring_size / 8);
4527                 mqd->queue_state.cp_hqd_pq_control |=
4528                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4529 #ifdef __BIG_ENDIAN
4530                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4531 #endif
4532                 mqd->queue_state.cp_hqd_pq_control &=
4533                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4534                 mqd->queue_state.cp_hqd_pq_control |=
4535                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4536                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4537
4538                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4539                 if (i == 0)
4540                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4541                 else
4542                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4543                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4544                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4545                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4546                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4547                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4548
4549                 /* set the wb address wether it's enabled or not */
4550                 if (i == 0)
4551                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4552                 else
4553                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4554                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4555                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4556                         upper_32_bits(wb_gpu_addr) & 0xffff;
4557                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4558                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4559                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4560                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4561
4562                 /* enable the doorbell if requested */
4563                 if (use_doorbell) {
4564                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4565                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4566                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4567                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4568                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4569                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4570                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4571                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4572
4573                 } else {
4574                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4575                 }
4576                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4577                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4578
4579                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4580                 rdev->ring[idx].wptr = 0;
4581                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4582                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4583                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4584                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4585
4586                 /* set the vmid for the queue */
4587                 mqd->queue_state.cp_hqd_vmid = 0;
4588                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4589
4590                 /* activate the queue */
4591                 mqd->queue_state.cp_hqd_active = 1;
4592                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4593
4594                 cik_srbm_select(rdev, 0, 0, 0, 0);
4595                 mutex_unlock(&rdev->srbm_mutex);
4596
4597                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4598                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4599
4600                 rdev->ring[idx].ready = true;
4601                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4602                 if (r)
4603                         rdev->ring[idx].ready = false;
4604         }
4605
4606         return 0;
4607 }
4608
4609 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4610 {
4611         cik_cp_gfx_enable(rdev, enable);
4612         cik_cp_compute_enable(rdev, enable);
4613 }
4614
4615 static int cik_cp_load_microcode(struct radeon_device *rdev)
4616 {
4617         int r;
4618
4619         r = cik_cp_gfx_load_microcode(rdev);
4620         if (r)
4621                 return r;
4622         r = cik_cp_compute_load_microcode(rdev);
4623         if (r)
4624                 return r;
4625
4626         return 0;
4627 }
4628
4629 static void cik_cp_fini(struct radeon_device *rdev)
4630 {
4631         cik_cp_gfx_fini(rdev);
4632         cik_cp_compute_fini(rdev);
4633 }
4634
4635 static int cik_cp_resume(struct radeon_device *rdev)
4636 {
4637         int r;
4638
4639         cik_enable_gui_idle_interrupt(rdev, false);
4640
4641         r = cik_cp_load_microcode(rdev);
4642         if (r)
4643                 return r;
4644
4645         r = cik_cp_gfx_resume(rdev);
4646         if (r)
4647                 return r;
4648         r = cik_cp_compute_resume(rdev);
4649         if (r)
4650                 return r;
4651
4652         cik_enable_gui_idle_interrupt(rdev, true);
4653
4654         return 0;
4655 }
4656
4657 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4658 {
4659         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4660                 RREG32(GRBM_STATUS));
4661         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4662                 RREG32(GRBM_STATUS2));
4663         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4664                 RREG32(GRBM_STATUS_SE0));
4665         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4666                 RREG32(GRBM_STATUS_SE1));
4667         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4668                 RREG32(GRBM_STATUS_SE2));
4669         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4670                 RREG32(GRBM_STATUS_SE3));
4671         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4672                 RREG32(SRBM_STATUS));
4673         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4674                 RREG32(SRBM_STATUS2));
4675         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4676                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4677         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4678                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4679         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4680         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4681                  RREG32(CP_STALLED_STAT1));
4682         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4683                  RREG32(CP_STALLED_STAT2));
4684         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4685                  RREG32(CP_STALLED_STAT3));
4686         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4687                  RREG32(CP_CPF_BUSY_STAT));
4688         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4689                  RREG32(CP_CPF_STALLED_STAT1));
4690         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4691         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4692         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4693                  RREG32(CP_CPC_STALLED_STAT1));
4694         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4695 }
4696
4697 /**
4698  * cik_gpu_check_soft_reset - check which blocks are busy
4699  *
4700  * @rdev: radeon_device pointer
4701  *
4702  * Check which blocks are busy and return the relevant reset
4703  * mask to be used by cik_gpu_soft_reset().
4704  * Returns a mask of the blocks to be reset.
4705  */
4706 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4707 {
4708         u32 reset_mask = 0;
4709         u32 tmp;
4710
4711         /* GRBM_STATUS */
4712         tmp = RREG32(GRBM_STATUS);
4713         if (tmp & (PA_BUSY | SC_BUSY |
4714                    BCI_BUSY | SX_BUSY |
4715                    TA_BUSY | VGT_BUSY |
4716                    DB_BUSY | CB_BUSY |
4717                    GDS_BUSY | SPI_BUSY |
4718                    IA_BUSY | IA_BUSY_NO_DMA))
4719                 reset_mask |= RADEON_RESET_GFX;
4720
4721         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4722                 reset_mask |= RADEON_RESET_CP;
4723
4724         /* GRBM_STATUS2 */
4725         tmp = RREG32(GRBM_STATUS2);
4726         if (tmp & RLC_BUSY)
4727                 reset_mask |= RADEON_RESET_RLC;
4728
4729         /* SDMA0_STATUS_REG */
4730         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4731         if (!(tmp & SDMA_IDLE))
4732                 reset_mask |= RADEON_RESET_DMA;
4733
4734         /* SDMA1_STATUS_REG */
4735         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4736         if (!(tmp & SDMA_IDLE))
4737                 reset_mask |= RADEON_RESET_DMA1;
4738
4739         /* SRBM_STATUS2 */
4740         tmp = RREG32(SRBM_STATUS2);
4741         if (tmp & SDMA_BUSY)
4742                 reset_mask |= RADEON_RESET_DMA;
4743
4744         if (tmp & SDMA1_BUSY)
4745                 reset_mask |= RADEON_RESET_DMA1;
4746
4747         /* SRBM_STATUS */
4748         tmp = RREG32(SRBM_STATUS);
4749
4750         if (tmp & IH_BUSY)
4751                 reset_mask |= RADEON_RESET_IH;
4752
4753         if (tmp & SEM_BUSY)
4754                 reset_mask |= RADEON_RESET_SEM;
4755
4756         if (tmp & GRBM_RQ_PENDING)
4757                 reset_mask |= RADEON_RESET_GRBM;
4758
4759         if (tmp & VMC_BUSY)
4760                 reset_mask |= RADEON_RESET_VMC;
4761
4762         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4763                    MCC_BUSY | MCD_BUSY))
4764                 reset_mask |= RADEON_RESET_MC;
4765
4766         if (evergreen_is_display_hung(rdev))
4767                 reset_mask |= RADEON_RESET_DISPLAY;
4768
4769         /* Skip MC reset as it's mostly likely not hung, just busy */
4770         if (reset_mask & RADEON_RESET_MC) {
4771                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4772                 reset_mask &= ~RADEON_RESET_MC;
4773         }
4774
4775         return reset_mask;
4776 }
4777
4778 /**
4779  * cik_gpu_soft_reset - soft reset GPU
4780  *
4781  * @rdev: radeon_device pointer
4782  * @reset_mask: mask of which blocks to reset
4783  *
4784  * Soft reset the blocks specified in @reset_mask.
4785  */
4786 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4787 {
4788         struct evergreen_mc_save save;
4789         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4790         u32 tmp;
4791
4792         if (reset_mask == 0)
4793                 return;
4794
4795         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4796
4797         cik_print_gpu_status_regs(rdev);
4798         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4799                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4800         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4801                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4802
4803         /* disable CG/PG */
4804         cik_fini_pg(rdev);
4805         cik_fini_cg(rdev);
4806
4807         /* stop the rlc */
4808         cik_rlc_stop(rdev);
4809
4810         /* Disable GFX parsing/prefetching */
4811         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4812
4813         /* Disable MEC parsing/prefetching */
4814         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4815
4816         if (reset_mask & RADEON_RESET_DMA) {
4817                 /* sdma0 */
4818                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4819                 tmp |= SDMA_HALT;
4820                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4821         }
4822         if (reset_mask & RADEON_RESET_DMA1) {
4823                 /* sdma1 */
4824                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4825                 tmp |= SDMA_HALT;
4826                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4827         }
4828
4829         evergreen_mc_stop(rdev, &save);
4830         if (evergreen_mc_wait_for_idle(rdev)) {
4831                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4832         }
4833
4834         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4835                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4836
4837         if (reset_mask & RADEON_RESET_CP) {
4838                 grbm_soft_reset |= SOFT_RESET_CP;
4839
4840                 srbm_soft_reset |= SOFT_RESET_GRBM;
4841         }
4842
4843         if (reset_mask & RADEON_RESET_DMA)
4844                 srbm_soft_reset |= SOFT_RESET_SDMA;
4845
4846         if (reset_mask & RADEON_RESET_DMA1)
4847                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4848
4849         if (reset_mask & RADEON_RESET_DISPLAY)
4850                 srbm_soft_reset |= SOFT_RESET_DC;
4851
4852         if (reset_mask & RADEON_RESET_RLC)
4853                 grbm_soft_reset |= SOFT_RESET_RLC;
4854
4855         if (reset_mask & RADEON_RESET_SEM)
4856                 srbm_soft_reset |= SOFT_RESET_SEM;
4857
4858         if (reset_mask & RADEON_RESET_IH)
4859                 srbm_soft_reset |= SOFT_RESET_IH;
4860
4861         if (reset_mask & RADEON_RESET_GRBM)
4862                 srbm_soft_reset |= SOFT_RESET_GRBM;
4863
4864         if (reset_mask & RADEON_RESET_VMC)
4865                 srbm_soft_reset |= SOFT_RESET_VMC;
4866
4867         if (!(rdev->flags & RADEON_IS_IGP)) {
4868                 if (reset_mask & RADEON_RESET_MC)
4869                         srbm_soft_reset |= SOFT_RESET_MC;
4870         }
4871
4872         if (grbm_soft_reset) {
4873                 tmp = RREG32(GRBM_SOFT_RESET);
4874                 tmp |= grbm_soft_reset;
4875                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4876                 WREG32(GRBM_SOFT_RESET, tmp);
4877                 tmp = RREG32(GRBM_SOFT_RESET);
4878
4879                 udelay(50);
4880
4881                 tmp &= ~grbm_soft_reset;
4882                 WREG32(GRBM_SOFT_RESET, tmp);
4883                 tmp = RREG32(GRBM_SOFT_RESET);
4884         }
4885
4886         if (srbm_soft_reset) {
4887                 tmp = RREG32(SRBM_SOFT_RESET);
4888                 tmp |= srbm_soft_reset;
4889                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4890                 WREG32(SRBM_SOFT_RESET, tmp);
4891                 tmp = RREG32(SRBM_SOFT_RESET);
4892
4893                 udelay(50);
4894
4895                 tmp &= ~srbm_soft_reset;
4896                 WREG32(SRBM_SOFT_RESET, tmp);
4897                 tmp = RREG32(SRBM_SOFT_RESET);
4898         }
4899
4900         /* Wait a little for things to settle down */
4901         udelay(50);
4902
4903         evergreen_mc_resume(rdev, &save);
4904         udelay(50);
4905
4906         cik_print_gpu_status_regs(rdev);
4907 }
4908
4909 struct kv_reset_save_regs {
4910         u32 gmcon_reng_execute;
4911         u32 gmcon_misc;
4912         u32 gmcon_misc3;
4913 };
4914
4915 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4916                                    struct kv_reset_save_regs *save)
4917 {
4918         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4919         save->gmcon_misc = RREG32(GMCON_MISC);
4920         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4921
4922         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4923         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4924                                                 STCTRL_STUTTER_EN));
4925 }
4926
4927 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4928                                       struct kv_reset_save_regs *save)
4929 {
4930         int i;
4931
4932         WREG32(GMCON_PGFSM_WRITE, 0);
4933         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4934
4935         for (i = 0; i < 5; i++)
4936                 WREG32(GMCON_PGFSM_WRITE, 0);
4937
4938         WREG32(GMCON_PGFSM_WRITE, 0);
4939         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4940
4941         for (i = 0; i < 5; i++)
4942                 WREG32(GMCON_PGFSM_WRITE, 0);
4943
4944         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4945         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4946
4947         for (i = 0; i < 5; i++)
4948                 WREG32(GMCON_PGFSM_WRITE, 0);
4949
4950         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4951         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4952
4953         for (i = 0; i < 5; i++)
4954                 WREG32(GMCON_PGFSM_WRITE, 0);
4955
4956         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4957         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4958
4959         for (i = 0; i < 5; i++)
4960                 WREG32(GMCON_PGFSM_WRITE, 0);
4961
4962         WREG32(GMCON_PGFSM_WRITE, 0);
4963         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4964
4965         for (i = 0; i < 5; i++)
4966                 WREG32(GMCON_PGFSM_WRITE, 0);
4967
4968         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4969         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4970
4971         for (i = 0; i < 5; i++)
4972                 WREG32(GMCON_PGFSM_WRITE, 0);
4973
4974         WREG32(GMCON_PGFSM_WRITE, 0x120202);
4975         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4976
4977         for (i = 0; i < 5; i++)
4978                 WREG32(GMCON_PGFSM_WRITE, 0);
4979
4980         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4981         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4982
4983         for (i = 0; i < 5; i++)
4984                 WREG32(GMCON_PGFSM_WRITE, 0);
4985
4986         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4987         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4988
4989         for (i = 0; i < 5; i++)
4990                 WREG32(GMCON_PGFSM_WRITE, 0);
4991
4992         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
4993         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
4994
4995         WREG32(GMCON_MISC3, save->gmcon_misc3);
4996         WREG32(GMCON_MISC, save->gmcon_misc);
4997         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
4998 }
4999
5000 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5001 {
5002         struct evergreen_mc_save save;
5003         struct kv_reset_save_regs kv_save = { 0 };
5004         u32 tmp, i;
5005
5006         dev_info(rdev->dev, "GPU pci config reset\n");
5007
5008         /* disable dpm? */
5009
5010         /* disable cg/pg */
5011         cik_fini_pg(rdev);
5012         cik_fini_cg(rdev);
5013
5014         /* Disable GFX parsing/prefetching */
5015         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5016
5017         /* Disable MEC parsing/prefetching */
5018         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5019
5020         /* sdma0 */
5021         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5022         tmp |= SDMA_HALT;
5023         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5024         /* sdma1 */
5025         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5026         tmp |= SDMA_HALT;
5027         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5028         /* XXX other engines? */
5029
5030         /* halt the rlc, disable cp internal ints */
5031         cik_rlc_stop(rdev);
5032
5033         udelay(50);
5034
5035         /* disable mem access */
5036         evergreen_mc_stop(rdev, &save);
5037         if (evergreen_mc_wait_for_idle(rdev)) {
5038                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5039         }
5040
5041         if (rdev->flags & RADEON_IS_IGP)
5042                 kv_save_regs_for_reset(rdev, &kv_save);
5043
5044         /* disable BM */
5045         pci_clear_master(rdev->pdev);
5046         /* reset */
5047         radeon_pci_config_reset(rdev);
5048
5049         udelay(100);
5050
5051         /* wait for asic to come out of reset */
5052         for (i = 0; i < rdev->usec_timeout; i++) {
5053                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5054                         break;
5055                 udelay(1);
5056         }
5057
5058         /* does asic init need to be run first??? */
5059         if (rdev->flags & RADEON_IS_IGP)
5060                 kv_restore_regs_for_reset(rdev, &kv_save);
5061 }
5062
5063 /**
5064  * cik_asic_reset - soft reset GPU
5065  *
5066  * @rdev: radeon_device pointer
5067  *
5068  * Look up which blocks are hung and attempt
5069  * to reset them.
5070  * Returns 0 for success.
5071  */
5072 int cik_asic_reset(struct radeon_device *rdev)
5073 {
5074         u32 reset_mask;
5075
5076         reset_mask = cik_gpu_check_soft_reset(rdev);
5077
5078         if (reset_mask)
5079                 r600_set_bios_scratch_engine_hung(rdev, true);
5080
5081         /* try soft reset */
5082         cik_gpu_soft_reset(rdev, reset_mask);
5083
5084         reset_mask = cik_gpu_check_soft_reset(rdev);
5085
5086         /* try pci config reset */
5087         if (reset_mask && radeon_hard_reset)
5088                 cik_gpu_pci_config_reset(rdev);
5089
5090         reset_mask = cik_gpu_check_soft_reset(rdev);
5091
5092         if (!reset_mask)
5093                 r600_set_bios_scratch_engine_hung(rdev, false);
5094
5095         return 0;
5096 }
5097
5098 /**
5099  * cik_gfx_is_lockup - check if the 3D engine is locked up
5100  *
5101  * @rdev: radeon_device pointer
5102  * @ring: radeon_ring structure holding ring information
5103  *
5104  * Check if the 3D engine is locked up (CIK).
5105  * Returns true if the engine is locked, false if not.
5106  */
5107 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5108 {
5109         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5110
5111         if (!(reset_mask & (RADEON_RESET_GFX |
5112                             RADEON_RESET_COMPUTE |
5113                             RADEON_RESET_CP))) {
5114                 radeon_ring_lockup_update(ring);
5115                 return false;
5116         }
5117         /* force CP activities */
5118         radeon_ring_force_activity(rdev, ring);
5119         return radeon_ring_test_lockup(rdev, ring);
5120 }
5121
5122 /* MC */
5123 /**
5124  * cik_mc_program - program the GPU memory controller
5125  *
5126  * @rdev: radeon_device pointer
5127  *
5128  * Set the location of vram, gart, and AGP in the GPU's
5129  * physical address space (CIK).
5130  */
5131 static void cik_mc_program(struct radeon_device *rdev)
5132 {
5133         struct evergreen_mc_save save;
5134         u32 tmp;
5135         int i, j;
5136
5137         /* Initialize HDP */
5138         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5139                 WREG32((0x2c14 + j), 0x00000000);
5140                 WREG32((0x2c18 + j), 0x00000000);
5141                 WREG32((0x2c1c + j), 0x00000000);
5142                 WREG32((0x2c20 + j), 0x00000000);
5143                 WREG32((0x2c24 + j), 0x00000000);
5144         }
5145         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5146
5147         evergreen_mc_stop(rdev, &save);
5148         if (radeon_mc_wait_for_idle(rdev)) {
5149                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5150         }
5151         /* Lockout access through VGA aperture*/
5152         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5153         /* Update configuration */
5154         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5155                rdev->mc.vram_start >> 12);
5156         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5157                rdev->mc.vram_end >> 12);
5158         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5159                rdev->vram_scratch.gpu_addr >> 12);
5160         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5161         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5162         WREG32(MC_VM_FB_LOCATION, tmp);
5163         /* XXX double check these! */
5164         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5165         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5166         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5167         WREG32(MC_VM_AGP_BASE, 0);
5168         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5169         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5170         if (radeon_mc_wait_for_idle(rdev)) {
5171                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5172         }
5173         evergreen_mc_resume(rdev, &save);
5174         /* we need to own VRAM, so turn off the VGA renderer here
5175          * to stop it overwriting our objects */
5176         rv515_vga_render_disable(rdev);
5177 }
5178
5179 /**
5180  * cik_mc_init - initialize the memory controller driver params
5181  *
5182  * @rdev: radeon_device pointer
5183  *
5184  * Look up the amount of vram, vram width, and decide how to place
5185  * vram and gart within the GPU's physical address space (CIK).
5186  * Returns 0 for success.
5187  */
5188 static int cik_mc_init(struct radeon_device *rdev)
5189 {
5190         u32 tmp;
5191         int chansize, numchan;
5192
5193         /* Get VRAM informations */
5194         rdev->mc.vram_is_ddr = true;
5195         tmp = RREG32(MC_ARB_RAMCFG);
5196         if (tmp & CHANSIZE_MASK) {
5197                 chansize = 64;
5198         } else {
5199                 chansize = 32;
5200         }
5201         tmp = RREG32(MC_SHARED_CHMAP);
5202         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5203         case 0:
5204         default:
5205                 numchan = 1;
5206                 break;
5207         case 1:
5208                 numchan = 2;
5209                 break;
5210         case 2:
5211                 numchan = 4;
5212                 break;
5213         case 3:
5214                 numchan = 8;
5215                 break;
5216         case 4:
5217                 numchan = 3;
5218                 break;
5219         case 5:
5220                 numchan = 6;
5221                 break;
5222         case 6:
5223                 numchan = 10;
5224                 break;
5225         case 7:
5226                 numchan = 12;
5227                 break;
5228         case 8:
5229                 numchan = 16;
5230                 break;
5231         }
5232         rdev->mc.vram_width = numchan * chansize;
5233         /* Could aper size report 0 ? */
5234         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5235         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5236         /* size in MB on si */
5237         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5238         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5239         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5240         si_vram_gtt_location(rdev, &rdev->mc);
5241         radeon_update_bandwidth_info(rdev);
5242
5243         return 0;
5244 }
5245
5246 /*
5247  * GART
5248  * VMID 0 is the physical GPU addresses as used by the kernel.
5249  * VMIDs 1-15 are used for userspace clients and are handled
5250  * by the radeon vm/hsa code.
5251  */
5252 /**
5253  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5254  *
5255  * @rdev: radeon_device pointer
5256  *
5257  * Flush the TLB for the VMID 0 page table (CIK).
5258  */
5259 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5260 {
5261         /* flush hdp cache */
5262         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5263
5264         /* bits 0-15 are the VM contexts0-15 */
5265         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5266 }
5267
5268 /**
5269  * cik_pcie_gart_enable - gart enable
5270  *
5271  * @rdev: radeon_device pointer
5272  *
5273  * This sets up the TLBs, programs the page tables for VMID0,
5274  * sets up the hw for VMIDs 1-15 which are allocated on
5275  * demand, and sets up the global locations for the LDS, GDS,
5276  * and GPUVM for FSA64 clients (CIK).
5277  * Returns 0 for success, errors for failure.
5278  */
5279 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5280 {
5281         int r, i;
5282
5283         if (rdev->gart.robj == NULL) {
5284                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5285                 return -EINVAL;
5286         }
5287         r = radeon_gart_table_vram_pin(rdev);
5288         if (r)
5289                 return r;
5290         radeon_gart_restore(rdev);
5291         /* Setup TLB control */
5292         WREG32(MC_VM_MX_L1_TLB_CNTL,
5293                (0xA << 7) |
5294                ENABLE_L1_TLB |
5295                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5296                ENABLE_ADVANCED_DRIVER_MODEL |
5297                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5298         /* Setup L2 cache */
5299         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5300                ENABLE_L2_FRAGMENT_PROCESSING |
5301                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5302                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5303                EFFECTIVE_L2_QUEUE_SIZE(7) |
5304                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5305         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5306         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5307                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5308         /* setup context0 */
5309         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5310         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5311         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5312         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5313                         (u32)(rdev->dummy_page.addr >> 12));
5314         WREG32(VM_CONTEXT0_CNTL2, 0);
5315         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5316                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5317
5318         WREG32(0x15D4, 0);
5319         WREG32(0x15D8, 0);
5320         WREG32(0x15DC, 0);
5321
5322         /* empty context1-15 */
5323         /* FIXME start with 4G, once using 2 level pt switch to full
5324          * vm size space
5325          */
5326         /* set vm size, must be a multiple of 4 */
5327         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5328         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5329         for (i = 1; i < 16; i++) {
5330                 if (i < 8)
5331                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5332                                rdev->gart.table_addr >> 12);
5333                 else
5334                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5335                                rdev->gart.table_addr >> 12);
5336         }
5337
5338         /* enable context1-15 */
5339         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5340                (u32)(rdev->dummy_page.addr >> 12));
5341         WREG32(VM_CONTEXT1_CNTL2, 4);
5342         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5343                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5344                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5345                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5346                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5347                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5348                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5349                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5350                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5351                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5352                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5353                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5354                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5355
5356         /* TC cache setup ??? */
5357         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5358         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5359         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5360
5361         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5362         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5363         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5364         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5365         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5366
5367         WREG32(TC_CFG_L1_VOLATILE, 0);
5368         WREG32(TC_CFG_L2_VOLATILE, 0);
5369
5370         if (rdev->family == CHIP_KAVERI) {
5371                 u32 tmp = RREG32(CHUB_CONTROL);
5372                 tmp &= ~BYPASS_VM;
5373                 WREG32(CHUB_CONTROL, tmp);
5374         }
5375
5376         /* XXX SH_MEM regs */
5377         /* where to put LDS, scratch, GPUVM in FSA64 space */
5378         mutex_lock(&rdev->srbm_mutex);
5379         for (i = 0; i < 16; i++) {
5380                 cik_srbm_select(rdev, 0, 0, 0, i);
5381                 /* CP and shaders */
5382                 WREG32(SH_MEM_CONFIG, 0);
5383                 WREG32(SH_MEM_APE1_BASE, 1);
5384                 WREG32(SH_MEM_APE1_LIMIT, 0);
5385                 WREG32(SH_MEM_BASES, 0);
5386                 /* SDMA GFX */
5387                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5388                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5389                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5390                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5391                 /* XXX SDMA RLC - todo */
5392         }
5393         cik_srbm_select(rdev, 0, 0, 0, 0);
5394         mutex_unlock(&rdev->srbm_mutex);
5395
5396         cik_pcie_gart_tlb_flush(rdev);
5397         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5398                  (unsigned)(rdev->mc.gtt_size >> 20),
5399                  (unsigned long long)rdev->gart.table_addr);
5400         rdev->gart.ready = true;
5401         return 0;
5402 }
5403
5404 /**
5405  * cik_pcie_gart_disable - gart disable
5406  *
5407  * @rdev: radeon_device pointer
5408  *
5409  * This disables all VM page table (CIK).
5410  */
5411 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5412 {
5413         /* Disable all tables */
5414         WREG32(VM_CONTEXT0_CNTL, 0);
5415         WREG32(VM_CONTEXT1_CNTL, 0);
5416         /* Setup TLB control */
5417         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5418                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5419         /* Setup L2 cache */
5420         WREG32(VM_L2_CNTL,
5421                ENABLE_L2_FRAGMENT_PROCESSING |
5422                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5423                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5424                EFFECTIVE_L2_QUEUE_SIZE(7) |
5425                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5426         WREG32(VM_L2_CNTL2, 0);
5427         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5428                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5429         radeon_gart_table_vram_unpin(rdev);
5430 }
5431
5432 /**
5433  * cik_pcie_gart_fini - vm fini callback
5434  *
5435  * @rdev: radeon_device pointer
5436  *
5437  * Tears down the driver GART/VM setup (CIK).
5438  */
5439 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5440 {
5441         cik_pcie_gart_disable(rdev);
5442         radeon_gart_table_vram_free(rdev);
5443         radeon_gart_fini(rdev);
5444 }
5445
5446 /* vm parser */
5447 /**
5448  * cik_ib_parse - vm ib_parse callback
5449  *
5450  * @rdev: radeon_device pointer
5451  * @ib: indirect buffer pointer
5452  *
5453  * CIK uses hw IB checking so this is a nop (CIK).
5454  */
5455 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5456 {
5457         return 0;
5458 }
5459
5460 /*
5461  * vm
5462  * VMID 0 is the physical GPU addresses as used by the kernel.
5463  * VMIDs 1-15 are used for userspace clients and are handled
5464  * by the radeon vm/hsa code.
5465  */
5466 /**
5467  * cik_vm_init - cik vm init callback
5468  *
5469  * @rdev: radeon_device pointer
5470  *
5471  * Inits cik specific vm parameters (number of VMs, base of vram for
5472  * VMIDs 1-15) (CIK).
5473  * Returns 0 for success.
5474  */
5475 int cik_vm_init(struct radeon_device *rdev)
5476 {
5477         /* number of VMs */
5478         rdev->vm_manager.nvm = 16;
5479         /* base offset of vram pages */
5480         if (rdev->flags & RADEON_IS_IGP) {
5481                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5482                 tmp <<= 22;
5483                 rdev->vm_manager.vram_base_offset = tmp;
5484         } else
5485                 rdev->vm_manager.vram_base_offset = 0;
5486
5487         return 0;
5488 }
5489
5490 /**
5491  * cik_vm_fini - cik vm fini callback
5492  *
5493  * @rdev: radeon_device pointer
5494  *
5495  * Tear down any asic specific VM setup (CIK).
5496  */
5497 void cik_vm_fini(struct radeon_device *rdev)
5498 {
5499 }
5500
5501 /**
5502  * cik_vm_decode_fault - print human readable fault info
5503  *
5504  * @rdev: radeon_device pointer
5505  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5506  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5507  *
5508  * Print human readable fault information (CIK).
5509  */
5510 static void cik_vm_decode_fault(struct radeon_device *rdev,
5511                                 u32 status, u32 addr, u32 mc_client)
5512 {
5513         u32 mc_id;
5514         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5515         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5516         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5517                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5518
5519         if (rdev->family == CHIP_HAWAII)
5520                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5521         else
5522                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5523
5524         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5525                protections, vmid, addr,
5526                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5527                block, mc_client, mc_id);
5528 }
5529
5530 /**
5531  * cik_vm_flush - cik vm flush using the CP
5532  *
5533  * @rdev: radeon_device pointer
5534  *
5535  * Update the page table base and flush the VM TLB
5536  * using the CP (CIK).
5537  */
5538 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5539 {
5540         struct radeon_ring *ring = &rdev->ring[ridx];
5541
5542         if (vm == NULL)
5543                 return;
5544
5545         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5546         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5547                                  WRITE_DATA_DST_SEL(0)));
5548         if (vm->id < 8) {
5549                 radeon_ring_write(ring,
5550                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5551         } else {
5552                 radeon_ring_write(ring,
5553                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5554         }
5555         radeon_ring_write(ring, 0);
5556         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5557
5558         /* update SH_MEM_* regs */
5559         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5560         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5561                                  WRITE_DATA_DST_SEL(0)));
5562         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5563         radeon_ring_write(ring, 0);
5564         radeon_ring_write(ring, VMID(vm->id));
5565
5566         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5567         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5568                                  WRITE_DATA_DST_SEL(0)));
5569         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5570         radeon_ring_write(ring, 0);
5571
5572         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5573         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5574         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5575         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5576
5577         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5578         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5579                                  WRITE_DATA_DST_SEL(0)));
5580         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5581         radeon_ring_write(ring, 0);
5582         radeon_ring_write(ring, VMID(0));
5583
5584         /* HDP flush */
5585         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5586
5587         /* bits 0-15 are the VM contexts0-15 */
5588         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5589         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5590                                  WRITE_DATA_DST_SEL(0)));
5591         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5592         radeon_ring_write(ring, 0);
5593         radeon_ring_write(ring, 1 << vm->id);
5594
5595         /* compute doesn't have PFP */
5596         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5597                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5598                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5599                 radeon_ring_write(ring, 0x0);
5600         }
5601 }
5602
5603 /*
5604  * RLC
5605  * The RLC is a multi-purpose microengine that handles a
5606  * variety of functions, the most important of which is
5607  * the interrupt controller.
5608  */
5609 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5610                                           bool enable)
5611 {
5612         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5613
5614         if (enable)
5615                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5616         else
5617                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5618         WREG32(CP_INT_CNTL_RING0, tmp);
5619 }
5620
5621 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5622 {
5623         u32 tmp;
5624
5625         tmp = RREG32(RLC_LB_CNTL);
5626         if (enable)
5627                 tmp |= LOAD_BALANCE_ENABLE;
5628         else
5629                 tmp &= ~LOAD_BALANCE_ENABLE;
5630         WREG32(RLC_LB_CNTL, tmp);
5631 }
5632
5633 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5634 {
5635         u32 i, j, k;
5636         u32 mask;
5637
5638         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5639                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5640                         cik_select_se_sh(rdev, i, j);
5641                         for (k = 0; k < rdev->usec_timeout; k++) {
5642                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5643                                         break;
5644                                 udelay(1);
5645                         }
5646                 }
5647         }
5648         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5649
5650         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5651         for (k = 0; k < rdev->usec_timeout; k++) {
5652                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5653                         break;
5654                 udelay(1);
5655         }
5656 }
5657
5658 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5659 {
5660         u32 tmp;
5661
5662         tmp = RREG32(RLC_CNTL);
5663         if (tmp != rlc)
5664                 WREG32(RLC_CNTL, rlc);
5665 }
5666
5667 static u32 cik_halt_rlc(struct radeon_device *rdev)
5668 {
5669         u32 data, orig;
5670
5671         orig = data = RREG32(RLC_CNTL);
5672
5673         if (data & RLC_ENABLE) {
5674                 u32 i;
5675
5676                 data &= ~RLC_ENABLE;
5677                 WREG32(RLC_CNTL, data);
5678
5679                 for (i = 0; i < rdev->usec_timeout; i++) {
5680                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5681                                 break;
5682                         udelay(1);
5683                 }
5684
5685                 cik_wait_for_rlc_serdes(rdev);
5686         }
5687
5688         return orig;
5689 }
5690
5691 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5692 {
5693         u32 tmp, i, mask;
5694
5695         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5696         WREG32(RLC_GPR_REG2, tmp);
5697
5698         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5699         for (i = 0; i < rdev->usec_timeout; i++) {
5700                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5701                         break;
5702                 udelay(1);
5703         }
5704
5705         for (i = 0; i < rdev->usec_timeout; i++) {
5706                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5707                         break;
5708                 udelay(1);
5709         }
5710 }
5711
5712 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5713 {
5714         u32 tmp;
5715
5716         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5717         WREG32(RLC_GPR_REG2, tmp);
5718 }
5719
5720 /**
5721  * cik_rlc_stop - stop the RLC ME
5722  *
5723  * @rdev: radeon_device pointer
5724  *
5725  * Halt the RLC ME (MicroEngine) (CIK).
5726  */
5727 static void cik_rlc_stop(struct radeon_device *rdev)
5728 {
5729         WREG32(RLC_CNTL, 0);
5730
5731         cik_enable_gui_idle_interrupt(rdev, false);
5732
5733         cik_wait_for_rlc_serdes(rdev);
5734 }
5735
5736 /**
5737  * cik_rlc_start - start the RLC ME
5738  *
5739  * @rdev: radeon_device pointer
5740  *
5741  * Unhalt the RLC ME (MicroEngine) (CIK).
5742  */
5743 static void cik_rlc_start(struct radeon_device *rdev)
5744 {
5745         WREG32(RLC_CNTL, RLC_ENABLE);
5746
5747         cik_enable_gui_idle_interrupt(rdev, true);
5748
5749         udelay(50);
5750 }
5751
5752 /**
5753  * cik_rlc_resume - setup the RLC hw
5754  *
5755  * @rdev: radeon_device pointer
5756  *
5757  * Initialize the RLC registers, load the ucode,
5758  * and start the RLC (CIK).
5759  * Returns 0 for success, -EINVAL if the ucode is not available.
5760  */
5761 static int cik_rlc_resume(struct radeon_device *rdev)
5762 {
5763         u32 i, size, tmp;
5764         const __be32 *fw_data;
5765
5766         if (!rdev->rlc_fw)
5767                 return -EINVAL;
5768
5769         switch (rdev->family) {
5770         case CHIP_BONAIRE:
5771         case CHIP_HAWAII:
5772         default:
5773                 size = BONAIRE_RLC_UCODE_SIZE;
5774                 break;
5775         case CHIP_KAVERI:
5776                 size = KV_RLC_UCODE_SIZE;
5777                 break;
5778         case CHIP_KABINI:
5779                 size = KB_RLC_UCODE_SIZE;
5780                 break;
5781         }
5782
5783         cik_rlc_stop(rdev);
5784
5785         /* disable CG */
5786         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5787         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5788
5789         si_rlc_reset(rdev);
5790
5791         cik_init_pg(rdev);
5792
5793         cik_init_cg(rdev);
5794
5795         WREG32(RLC_LB_CNTR_INIT, 0);
5796         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5797
5798         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5799         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5800         WREG32(RLC_LB_PARAMS, 0x00600408);
5801         WREG32(RLC_LB_CNTL, 0x80000004);
5802
5803         WREG32(RLC_MC_CNTL, 0);
5804         WREG32(RLC_UCODE_CNTL, 0);
5805
5806         fw_data = (const __be32 *)rdev->rlc_fw->data;
5807                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5808         for (i = 0; i < size; i++)
5809                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5810         WREG32(RLC_GPM_UCODE_ADDR, 0);
5811
5812         /* XXX - find out what chips support lbpw */
5813         cik_enable_lbpw(rdev, false);
5814
5815         if (rdev->family == CHIP_BONAIRE)
5816                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5817
5818         cik_rlc_start(rdev);
5819
5820         return 0;
5821 }
5822
5823 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5824 {
5825         u32 data, orig, tmp, tmp2;
5826
5827         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5828
5829         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5830                 cik_enable_gui_idle_interrupt(rdev, true);
5831
5832                 tmp = cik_halt_rlc(rdev);
5833
5834                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5835                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5836                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5837                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5838                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5839
5840                 cik_update_rlc(rdev, tmp);
5841
5842                 data |= CGCG_EN | CGLS_EN;
5843         } else {
5844                 cik_enable_gui_idle_interrupt(rdev, false);
5845
5846                 RREG32(CB_CGTT_SCLK_CTRL);
5847                 RREG32(CB_CGTT_SCLK_CTRL);
5848                 RREG32(CB_CGTT_SCLK_CTRL);
5849                 RREG32(CB_CGTT_SCLK_CTRL);
5850
5851                 data &= ~(CGCG_EN | CGLS_EN);
5852         }
5853
5854         if (orig != data)
5855                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5856
5857 }
5858
5859 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5860 {
5861         u32 data, orig, tmp = 0;
5862
5863         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5864                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5865                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5866                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5867                                 data |= CP_MEM_LS_EN;
5868                                 if (orig != data)
5869                                         WREG32(CP_MEM_SLP_CNTL, data);
5870                         }
5871                 }
5872
5873                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5874                 data &= 0xfffffffd;
5875                 if (orig != data)
5876                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5877
5878                 tmp = cik_halt_rlc(rdev);
5879
5880                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5881                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5882                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5883                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5884                 WREG32(RLC_SERDES_WR_CTRL, data);
5885
5886                 cik_update_rlc(rdev, tmp);
5887
5888                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5889                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5890                         data &= ~SM_MODE_MASK;
5891                         data |= SM_MODE(0x2);
5892                         data |= SM_MODE_ENABLE;
5893                         data &= ~CGTS_OVERRIDE;
5894                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5895                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5896                                 data &= ~CGTS_LS_OVERRIDE;
5897                         data &= ~ON_MONITOR_ADD_MASK;
5898                         data |= ON_MONITOR_ADD_EN;
5899                         data |= ON_MONITOR_ADD(0x96);
5900                         if (orig != data)
5901                                 WREG32(CGTS_SM_CTRL_REG, data);
5902                 }
5903         } else {
5904                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5905                 data |= 0x00000002;
5906                 if (orig != data)
5907                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5908
5909                 data = RREG32(RLC_MEM_SLP_CNTL);
5910                 if (data & RLC_MEM_LS_EN) {
5911                         data &= ~RLC_MEM_LS_EN;
5912                         WREG32(RLC_MEM_SLP_CNTL, data);
5913                 }
5914
5915                 data = RREG32(CP_MEM_SLP_CNTL);
5916                 if (data & CP_MEM_LS_EN) {
5917                         data &= ~CP_MEM_LS_EN;
5918                         WREG32(CP_MEM_SLP_CNTL, data);
5919                 }
5920
5921                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5922                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5923                 if (orig != data)
5924                         WREG32(CGTS_SM_CTRL_REG, data);
5925
5926                 tmp = cik_halt_rlc(rdev);
5927
5928                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5929                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5930                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5931                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5932                 WREG32(RLC_SERDES_WR_CTRL, data);
5933
5934                 cik_update_rlc(rdev, tmp);
5935         }
5936 }
5937
5938 static const u32 mc_cg_registers[] =
5939 {
5940         MC_HUB_MISC_HUB_CG,
5941         MC_HUB_MISC_SIP_CG,
5942         MC_HUB_MISC_VM_CG,
5943         MC_XPB_CLK_GAT,
5944         ATC_MISC_CG,
5945         MC_CITF_MISC_WR_CG,
5946         MC_CITF_MISC_RD_CG,
5947         MC_CITF_MISC_VM_CG,
5948         VM_L2_CG,
5949 };
5950
5951 static void cik_enable_mc_ls(struct radeon_device *rdev,
5952                              bool enable)
5953 {
5954         int i;
5955         u32 orig, data;
5956
5957         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5958                 orig = data = RREG32(mc_cg_registers[i]);
5959                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5960                         data |= MC_LS_ENABLE;
5961                 else
5962                         data &= ~MC_LS_ENABLE;
5963                 if (data != orig)
5964                         WREG32(mc_cg_registers[i], data);
5965         }
5966 }
5967
5968 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5969                                bool enable)
5970 {
5971         int i;
5972         u32 orig, data;
5973
5974         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5975                 orig = data = RREG32(mc_cg_registers[i]);
5976                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5977                         data |= MC_CG_ENABLE;
5978                 else
5979                         data &= ~MC_CG_ENABLE;
5980                 if (data != orig)
5981                         WREG32(mc_cg_registers[i], data);
5982         }
5983 }
5984
5985 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5986                                  bool enable)
5987 {
5988         u32 orig, data;
5989
5990         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5991                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5992                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5993         } else {
5994                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5995                 data |= 0xff000000;
5996                 if (data != orig)
5997                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5998
5999                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6000                 data |= 0xff000000;
6001                 if (data != orig)
6002                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6003         }
6004 }
6005
6006 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6007                                  bool enable)
6008 {
6009         u32 orig, data;
6010
6011         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6012                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6013                 data |= 0x100;
6014                 if (orig != data)
6015                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6016
6017                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6018                 data |= 0x100;
6019                 if (orig != data)
6020                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6021         } else {
6022                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6023                 data &= ~0x100;
6024                 if (orig != data)
6025                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6026
6027                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6028                 data &= ~0x100;
6029                 if (orig != data)
6030                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6031         }
6032 }
6033
6034 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6035                                 bool enable)
6036 {
6037         u32 orig, data;
6038
6039         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6040                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6041                 data = 0xfff;
6042                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6043
6044                 orig = data = RREG32(UVD_CGC_CTRL);
6045                 data |= DCM;
6046                 if (orig != data)
6047                         WREG32(UVD_CGC_CTRL, data);
6048         } else {
6049                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6050                 data &= ~0xfff;
6051                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6052
6053                 orig = data = RREG32(UVD_CGC_CTRL);
6054                 data &= ~DCM;
6055                 if (orig != data)
6056                         WREG32(UVD_CGC_CTRL, data);
6057         }
6058 }
6059
6060 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6061                                bool enable)
6062 {
6063         u32 orig, data;
6064
6065         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6066
6067         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6068                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6069                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6070         else
6071                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6072                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6073
6074         if (orig != data)
6075                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6076 }
6077
6078 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6079                                 bool enable)
6080 {
6081         u32 orig, data;
6082
6083         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6084
6085         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6086                 data &= ~CLOCK_GATING_DIS;
6087         else
6088                 data |= CLOCK_GATING_DIS;
6089
6090         if (orig != data)
6091                 WREG32(HDP_HOST_PATH_CNTL, data);
6092 }
6093
6094 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6095                               bool enable)
6096 {
6097         u32 orig, data;
6098
6099         orig = data = RREG32(HDP_MEM_POWER_LS);
6100
6101         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6102                 data |= HDP_LS_ENABLE;
6103         else
6104                 data &= ~HDP_LS_ENABLE;
6105
6106         if (orig != data)
6107                 WREG32(HDP_MEM_POWER_LS, data);
6108 }
6109
6110 void cik_update_cg(struct radeon_device *rdev,
6111                    u32 block, bool enable)
6112 {
6113
6114         if (block & RADEON_CG_BLOCK_GFX) {
6115                 cik_enable_gui_idle_interrupt(rdev, false);
6116                 /* order matters! */
6117                 if (enable) {
6118                         cik_enable_mgcg(rdev, true);
6119                         cik_enable_cgcg(rdev, true);
6120                 } else {
6121                         cik_enable_cgcg(rdev, false);
6122                         cik_enable_mgcg(rdev, false);
6123                 }
6124                 cik_enable_gui_idle_interrupt(rdev, true);
6125         }
6126
6127         if (block & RADEON_CG_BLOCK_MC) {
6128                 if (!(rdev->flags & RADEON_IS_IGP)) {
6129                         cik_enable_mc_mgcg(rdev, enable);
6130                         cik_enable_mc_ls(rdev, enable);
6131                 }
6132         }
6133
6134         if (block & RADEON_CG_BLOCK_SDMA) {
6135                 cik_enable_sdma_mgcg(rdev, enable);
6136                 cik_enable_sdma_mgls(rdev, enable);
6137         }
6138
6139         if (block & RADEON_CG_BLOCK_BIF) {
6140                 cik_enable_bif_mgls(rdev, enable);
6141         }
6142
6143         if (block & RADEON_CG_BLOCK_UVD) {
6144                 if (rdev->has_uvd)
6145                         cik_enable_uvd_mgcg(rdev, enable);
6146         }
6147
6148         if (block & RADEON_CG_BLOCK_HDP) {
6149                 cik_enable_hdp_mgcg(rdev, enable);
6150                 cik_enable_hdp_ls(rdev, enable);
6151         }
6152 }
6153
6154 static void cik_init_cg(struct radeon_device *rdev)
6155 {
6156
6157         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6158
6159         if (rdev->has_uvd)
6160                 si_init_uvd_internal_cg(rdev);
6161
6162         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6163                              RADEON_CG_BLOCK_SDMA |
6164                              RADEON_CG_BLOCK_BIF |
6165                              RADEON_CG_BLOCK_UVD |
6166                              RADEON_CG_BLOCK_HDP), true);
6167 }
6168
6169 static void cik_fini_cg(struct radeon_device *rdev)
6170 {
6171         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6172                              RADEON_CG_BLOCK_SDMA |
6173                              RADEON_CG_BLOCK_BIF |
6174                              RADEON_CG_BLOCK_UVD |
6175                              RADEON_CG_BLOCK_HDP), false);
6176
6177         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6178 }
6179
6180 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6181                                           bool enable)
6182 {
6183         u32 data, orig;
6184
6185         orig = data = RREG32(RLC_PG_CNTL);
6186         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6187                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6188         else
6189                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6190         if (orig != data)
6191                 WREG32(RLC_PG_CNTL, data);
6192 }
6193
6194 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6195                                           bool enable)
6196 {
6197         u32 data, orig;
6198
6199         orig = data = RREG32(RLC_PG_CNTL);
6200         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6201                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6202         else
6203                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6204         if (orig != data)
6205                 WREG32(RLC_PG_CNTL, data);
6206 }
6207
6208 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6209 {
6210         u32 data, orig;
6211
6212         orig = data = RREG32(RLC_PG_CNTL);
6213         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6214                 data &= ~DISABLE_CP_PG;
6215         else
6216                 data |= DISABLE_CP_PG;
6217         if (orig != data)
6218                 WREG32(RLC_PG_CNTL, data);
6219 }
6220
6221 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6222 {
6223         u32 data, orig;
6224
6225         orig = data = RREG32(RLC_PG_CNTL);
6226         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6227                 data &= ~DISABLE_GDS_PG;
6228         else
6229                 data |= DISABLE_GDS_PG;
6230         if (orig != data)
6231                 WREG32(RLC_PG_CNTL, data);
6232 }
6233
6234 #define CP_ME_TABLE_SIZE    96
6235 #define CP_ME_TABLE_OFFSET  2048
6236 #define CP_MEC_TABLE_OFFSET 4096
6237
6238 void cik_init_cp_pg_table(struct radeon_device *rdev)
6239 {
6240         const __be32 *fw_data;
6241         volatile u32 *dst_ptr;
6242         int me, i, max_me = 4;
6243         u32 bo_offset = 0;
6244         u32 table_offset;
6245
6246         if (rdev->family == CHIP_KAVERI)
6247                 max_me = 5;
6248
6249         if (rdev->rlc.cp_table_ptr == NULL)
6250                 return;
6251
6252         /* write the cp table buffer */
6253         dst_ptr = rdev->rlc.cp_table_ptr;
6254         for (me = 0; me < max_me; me++) {
6255                 if (me == 0) {
6256                         fw_data = (const __be32 *)rdev->ce_fw->data;
6257                         table_offset = CP_ME_TABLE_OFFSET;
6258                 } else if (me == 1) {
6259                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6260                         table_offset = CP_ME_TABLE_OFFSET;
6261                 } else if (me == 2) {
6262                         fw_data = (const __be32 *)rdev->me_fw->data;
6263                         table_offset = CP_ME_TABLE_OFFSET;
6264                 } else {
6265                         fw_data = (const __be32 *)rdev->mec_fw->data;
6266                         table_offset = CP_MEC_TABLE_OFFSET;
6267                 }
6268
6269                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6270                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6271                 }
6272                 bo_offset += CP_ME_TABLE_SIZE;
6273         }
6274 }
6275
6276 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6277                                 bool enable)
6278 {
6279         u32 data, orig;
6280
6281         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6282                 orig = data = RREG32(RLC_PG_CNTL);
6283                 data |= GFX_PG_ENABLE;
6284                 if (orig != data)
6285                         WREG32(RLC_PG_CNTL, data);
6286
6287                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6288                 data |= AUTO_PG_EN;
6289                 if (orig != data)
6290                         WREG32(RLC_AUTO_PG_CTRL, data);
6291         } else {
6292                 orig = data = RREG32(RLC_PG_CNTL);
6293                 data &= ~GFX_PG_ENABLE;
6294                 if (orig != data)
6295                         WREG32(RLC_PG_CNTL, data);
6296
6297                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6298                 data &= ~AUTO_PG_EN;
6299                 if (orig != data)
6300                         WREG32(RLC_AUTO_PG_CTRL, data);
6301
6302                 data = RREG32(DB_RENDER_CONTROL);
6303         }
6304 }
6305
6306 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6307 {
6308         u32 mask = 0, tmp, tmp1;
6309         int i;
6310
6311         cik_select_se_sh(rdev, se, sh);
6312         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6313         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6314         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6315
6316         tmp &= 0xffff0000;
6317
6318         tmp |= tmp1;
6319         tmp >>= 16;
6320
6321         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6322                 mask <<= 1;
6323                 mask |= 1;
6324         }
6325
6326         return (~tmp) & mask;
6327 }
6328
6329 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6330 {
6331         u32 i, j, k, active_cu_number = 0;
6332         u32 mask, counter, cu_bitmap;
6333         u32 tmp = 0;
6334
6335         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6336                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6337                         mask = 1;
6338                         cu_bitmap = 0;
6339                         counter = 0;
6340                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6341                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6342                                         if (counter < 2)
6343                                                 cu_bitmap |= mask;
6344                                         counter ++;
6345                                 }
6346                                 mask <<= 1;
6347                         }
6348
6349                         active_cu_number += counter;
6350                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6351                 }
6352         }
6353
6354         WREG32(RLC_PG_AO_CU_MASK, tmp);
6355
6356         tmp = RREG32(RLC_MAX_PG_CU);
6357         tmp &= ~MAX_PU_CU_MASK;
6358         tmp |= MAX_PU_CU(active_cu_number);
6359         WREG32(RLC_MAX_PG_CU, tmp);
6360 }
6361
6362 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6363                                        bool enable)
6364 {
6365         u32 data, orig;
6366
6367         orig = data = RREG32(RLC_PG_CNTL);
6368         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6369                 data |= STATIC_PER_CU_PG_ENABLE;
6370         else
6371                 data &= ~STATIC_PER_CU_PG_ENABLE;
6372         if (orig != data)
6373                 WREG32(RLC_PG_CNTL, data);
6374 }
6375
6376 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6377                                         bool enable)
6378 {
6379         u32 data, orig;
6380
6381         orig = data = RREG32(RLC_PG_CNTL);
6382         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6383                 data |= DYN_PER_CU_PG_ENABLE;
6384         else
6385                 data &= ~DYN_PER_CU_PG_ENABLE;
6386         if (orig != data)
6387                 WREG32(RLC_PG_CNTL, data);
6388 }
6389
6390 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6391 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6392
6393 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6394 {
6395         u32 data, orig;
6396         u32 i;
6397
6398         if (rdev->rlc.cs_data) {
6399                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6400                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6401                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6402                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6403         } else {
6404                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6405                 for (i = 0; i < 3; i++)
6406                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6407         }
6408         if (rdev->rlc.reg_list) {
6409                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6410                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6411                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6412         }
6413
6414         orig = data = RREG32(RLC_PG_CNTL);
6415         data |= GFX_PG_SRC;
6416         if (orig != data)
6417                 WREG32(RLC_PG_CNTL, data);
6418
6419         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6420         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6421
6422         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6423         data &= ~IDLE_POLL_COUNT_MASK;
6424         data |= IDLE_POLL_COUNT(0x60);
6425         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6426
6427         data = 0x10101010;
6428         WREG32(RLC_PG_DELAY, data);
6429
6430         data = RREG32(RLC_PG_DELAY_2);
6431         data &= ~0xff;
6432         data |= 0x3;
6433         WREG32(RLC_PG_DELAY_2, data);
6434
6435         data = RREG32(RLC_AUTO_PG_CTRL);
6436         data &= ~GRBM_REG_SGIT_MASK;
6437         data |= GRBM_REG_SGIT(0x700);
6438         WREG32(RLC_AUTO_PG_CTRL, data);
6439
6440 }
6441
6442 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6443 {
6444         cik_enable_gfx_cgpg(rdev, enable);
6445         cik_enable_gfx_static_mgpg(rdev, enable);
6446         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6447 }
6448
6449 u32 cik_get_csb_size(struct radeon_device *rdev)
6450 {
6451         u32 count = 0;
6452         const struct cs_section_def *sect = NULL;
6453         const struct cs_extent_def *ext = NULL;
6454
6455         if (rdev->rlc.cs_data == NULL)
6456                 return 0;
6457
6458         /* begin clear state */
6459         count += 2;
6460         /* context control state */
6461         count += 3;
6462
6463         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6464                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6465                         if (sect->id == SECT_CONTEXT)
6466                                 count += 2 + ext->reg_count;
6467                         else
6468                                 return 0;
6469                 }
6470         }
6471         /* pa_sc_raster_config/pa_sc_raster_config1 */
6472         count += 4;
6473         /* end clear state */
6474         count += 2;
6475         /* clear state */
6476         count += 2;
6477
6478         return count;
6479 }
6480
6481 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6482 {
6483         u32 count = 0, i;
6484         const struct cs_section_def *sect = NULL;
6485         const struct cs_extent_def *ext = NULL;
6486
6487         if (rdev->rlc.cs_data == NULL)
6488                 return;
6489         if (buffer == NULL)
6490                 return;
6491
6492         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6493         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6494
6495         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6496         buffer[count++] = cpu_to_le32(0x80000000);
6497         buffer[count++] = cpu_to_le32(0x80000000);
6498
6499         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6500                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6501                         if (sect->id == SECT_CONTEXT) {
6502                                 buffer[count++] =
6503                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6504                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6505                                 for (i = 0; i < ext->reg_count; i++)
6506                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6507                         } else {
6508                                 return;
6509                         }
6510                 }
6511         }
6512
6513         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6514         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6515         switch (rdev->family) {
6516         case CHIP_BONAIRE:
6517                 buffer[count++] = cpu_to_le32(0x16000012);
6518                 buffer[count++] = cpu_to_le32(0x00000000);
6519                 break;
6520         case CHIP_KAVERI:
6521                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6522                 buffer[count++] = cpu_to_le32(0x00000000);
6523                 break;
6524         case CHIP_KABINI:
6525                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6526                 buffer[count++] = cpu_to_le32(0x00000000);
6527                 break;
6528         case CHIP_HAWAII:
6529                 buffer[count++] = 0x3a00161a;
6530                 buffer[count++] = 0x0000002e;
6531                 break;
6532         default:
6533                 buffer[count++] = cpu_to_le32(0x00000000);
6534                 buffer[count++] = cpu_to_le32(0x00000000);
6535                 break;
6536         }
6537
6538         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6539         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6540
6541         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6542         buffer[count++] = cpu_to_le32(0);
6543 }
6544
6545 static void cik_init_pg(struct radeon_device *rdev)
6546 {
6547         if (rdev->pg_flags) {
6548                 cik_enable_sck_slowdown_on_pu(rdev, true);
6549                 cik_enable_sck_slowdown_on_pd(rdev, true);
6550                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6551                         cik_init_gfx_cgpg(rdev);
6552                         cik_enable_cp_pg(rdev, true);
6553                         cik_enable_gds_pg(rdev, true);
6554                 }
6555                 cik_init_ao_cu_mask(rdev);
6556                 cik_update_gfx_pg(rdev, true);
6557         }
6558 }
6559
6560 static void cik_fini_pg(struct radeon_device *rdev)
6561 {
6562         if (rdev->pg_flags) {
6563                 cik_update_gfx_pg(rdev, false);
6564                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6565                         cik_enable_cp_pg(rdev, false);
6566                         cik_enable_gds_pg(rdev, false);
6567                 }
6568         }
6569 }
6570
6571 /*
6572  * Interrupts
6573  * Starting with r6xx, interrupts are handled via a ring buffer.
6574  * Ring buffers are areas of GPU accessible memory that the GPU
6575  * writes interrupt vectors into and the host reads vectors out of.
6576  * There is a rptr (read pointer) that determines where the
6577  * host is currently reading, and a wptr (write pointer)
6578  * which determines where the GPU has written.  When the
6579  * pointers are equal, the ring is idle.  When the GPU
6580  * writes vectors to the ring buffer, it increments the
6581  * wptr.  When there is an interrupt, the host then starts
6582  * fetching commands and processing them until the pointers are
6583  * equal again at which point it updates the rptr.
6584  */
6585
6586 /**
6587  * cik_enable_interrupts - Enable the interrupt ring buffer
6588  *
6589  * @rdev: radeon_device pointer
6590  *
6591  * Enable the interrupt ring buffer (CIK).
6592  */
6593 static void cik_enable_interrupts(struct radeon_device *rdev)
6594 {
6595         u32 ih_cntl = RREG32(IH_CNTL);
6596         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6597
6598         ih_cntl |= ENABLE_INTR;
6599         ih_rb_cntl |= IH_RB_ENABLE;
6600         WREG32(IH_CNTL, ih_cntl);
6601         WREG32(IH_RB_CNTL, ih_rb_cntl);
6602         rdev->ih.enabled = true;
6603 }
6604
6605 /**
6606  * cik_disable_interrupts - Disable the interrupt ring buffer
6607  *
6608  * @rdev: radeon_device pointer
6609  *
6610  * Disable the interrupt ring buffer (CIK).
6611  */
6612 static void cik_disable_interrupts(struct radeon_device *rdev)
6613 {
6614         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6615         u32 ih_cntl = RREG32(IH_CNTL);
6616
6617         ih_rb_cntl &= ~IH_RB_ENABLE;
6618         ih_cntl &= ~ENABLE_INTR;
6619         WREG32(IH_RB_CNTL, ih_rb_cntl);
6620         WREG32(IH_CNTL, ih_cntl);
6621         /* set rptr, wptr to 0 */
6622         WREG32(IH_RB_RPTR, 0);
6623         WREG32(IH_RB_WPTR, 0);
6624         rdev->ih.enabled = false;
6625         rdev->ih.rptr = 0;
6626 }
6627
6628 /**
6629  * cik_disable_interrupt_state - Disable all interrupt sources
6630  *
6631  * @rdev: radeon_device pointer
6632  *
6633  * Clear all interrupt enable bits used by the driver (CIK).
6634  */
6635 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6636 {
6637         u32 tmp;
6638
6639         /* gfx ring */
6640         tmp = RREG32(CP_INT_CNTL_RING0) &
6641                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6642         WREG32(CP_INT_CNTL_RING0, tmp);
6643         /* sdma */
6644         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6645         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6646         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6647         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6648         /* compute queues */
6649         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6650         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6651         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6652         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6653         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6654         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6655         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6656         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6657         /* grbm */
6658         WREG32(GRBM_INT_CNTL, 0);
6659         /* vline/vblank, etc. */
6660         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6661         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6662         if (rdev->num_crtc >= 4) {
6663                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6664                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6665         }
6666         if (rdev->num_crtc >= 6) {
6667                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6668                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6669         }
6670
6671         /* dac hotplug */
6672         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6673
6674         /* digital hotplug */
6675         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6676         WREG32(DC_HPD1_INT_CONTROL, tmp);
6677         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6678         WREG32(DC_HPD2_INT_CONTROL, tmp);
6679         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6680         WREG32(DC_HPD3_INT_CONTROL, tmp);
6681         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6682         WREG32(DC_HPD4_INT_CONTROL, tmp);
6683         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6684         WREG32(DC_HPD5_INT_CONTROL, tmp);
6685         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6686         WREG32(DC_HPD6_INT_CONTROL, tmp);
6687
6688 }
6689
6690 /**
6691  * cik_irq_init - init and enable the interrupt ring
6692  *
6693  * @rdev: radeon_device pointer
6694  *
6695  * Allocate a ring buffer for the interrupt controller,
6696  * enable the RLC, disable interrupts, enable the IH
6697  * ring buffer and enable it (CIK).
6698  * Called at device load and reume.
6699  * Returns 0 for success, errors for failure.
6700  */
6701 static int cik_irq_init(struct radeon_device *rdev)
6702 {
6703         int ret = 0;
6704         int rb_bufsz;
6705         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6706
6707         /* allocate ring */
6708         ret = r600_ih_ring_alloc(rdev);
6709         if (ret)
6710                 return ret;
6711
6712         /* disable irqs */
6713         cik_disable_interrupts(rdev);
6714
6715         /* init rlc */
6716         ret = cik_rlc_resume(rdev);
6717         if (ret) {
6718                 r600_ih_ring_fini(rdev);
6719                 return ret;
6720         }
6721
6722         /* setup interrupt control */
6723         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6724         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6725         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6726         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6727          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6728          */
6729         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6730         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6731         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6732         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6733
6734         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6735         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6736
6737         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6738                       IH_WPTR_OVERFLOW_CLEAR |
6739                       (rb_bufsz << 1));
6740
6741         if (rdev->wb.enabled)
6742                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6743
6744         /* set the writeback address whether it's enabled or not */
6745         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6746         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6747
6748         WREG32(IH_RB_CNTL, ih_rb_cntl);
6749
6750         /* set rptr, wptr to 0 */
6751         WREG32(IH_RB_RPTR, 0);
6752         WREG32(IH_RB_WPTR, 0);
6753
6754         /* Default settings for IH_CNTL (disabled at first) */
6755         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6756         /* RPTR_REARM only works if msi's are enabled */
6757         if (rdev->msi_enabled)
6758                 ih_cntl |= RPTR_REARM;
6759         WREG32(IH_CNTL, ih_cntl);
6760
6761         /* force the active interrupt state to all disabled */
6762         cik_disable_interrupt_state(rdev);
6763
6764         pci_set_master(rdev->pdev);
6765
6766         /* enable irqs */
6767         cik_enable_interrupts(rdev);
6768
6769         return ret;
6770 }
6771
6772 /**
6773  * cik_irq_set - enable/disable interrupt sources
6774  *
6775  * @rdev: radeon_device pointer
6776  *
6777  * Enable interrupt sources on the GPU (vblanks, hpd,
6778  * etc.) (CIK).
6779  * Returns 0 for success, errors for failure.
6780  */
6781 int cik_irq_set(struct radeon_device *rdev)
6782 {
6783         u32 cp_int_cntl;
6784         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6785         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6786         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6787         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6788         u32 grbm_int_cntl = 0;
6789         u32 dma_cntl, dma_cntl1;
6790         u32 thermal_int;
6791
6792         if (!rdev->irq.installed) {
6793                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6794                 return -EINVAL;
6795         }
6796         /* don't enable anything if the ih is disabled */
6797         if (!rdev->ih.enabled) {
6798                 cik_disable_interrupts(rdev);
6799                 /* force the active interrupt state to all disabled */
6800                 cik_disable_interrupt_state(rdev);
6801                 return 0;
6802         }
6803
6804         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6805                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6806         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6807
6808         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6809         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6810         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6811         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6812         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6813         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6814
6815         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6816         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6817
6818         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6819         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6820         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6821         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6822         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6823         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6824         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6825         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6826
6827         if (rdev->flags & RADEON_IS_IGP)
6828                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6829                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6830         else
6831                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6832                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6833
6834         /* enable CP interrupts on all rings */
6835         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6836                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6837                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6838         }
6839         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6840                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6841                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6842                 if (ring->me == 1) {
6843                         switch (ring->pipe) {
6844                         case 0:
6845                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6846                                 break;
6847                         case 1:
6848                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6849                                 break;
6850                         case 2:
6851                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6852                                 break;
6853                         case 3:
6854                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6855                                 break;
6856                         default:
6857                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6858                                 break;
6859                         }
6860                 } else if (ring->me == 2) {
6861                         switch (ring->pipe) {
6862                         case 0:
6863                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6864                                 break;
6865                         case 1:
6866                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6867                                 break;
6868                         case 2:
6869                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6870                                 break;
6871                         case 3:
6872                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6873                                 break;
6874                         default:
6875                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6876                                 break;
6877                         }
6878                 } else {
6879                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6880                 }
6881         }
6882         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6883                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6884                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6885                 if (ring->me == 1) {
6886                         switch (ring->pipe) {
6887                         case 0:
6888                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6889                                 break;
6890                         case 1:
6891                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6892                                 break;
6893                         case 2:
6894                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6895                                 break;
6896                         case 3:
6897                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6898                                 break;
6899                         default:
6900                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6901                                 break;
6902                         }
6903                 } else if (ring->me == 2) {
6904                         switch (ring->pipe) {
6905                         case 0:
6906                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6907                                 break;
6908                         case 1:
6909                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6910                                 break;
6911                         case 2:
6912                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6913                                 break;
6914                         case 3:
6915                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6916                                 break;
6917                         default:
6918                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6919                                 break;
6920                         }
6921                 } else {
6922                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6923                 }
6924         }
6925
6926         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6927                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6928                 dma_cntl |= TRAP_ENABLE;
6929         }
6930
6931         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6932                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6933                 dma_cntl1 |= TRAP_ENABLE;
6934         }
6935
6936         if (rdev->irq.crtc_vblank_int[0] ||
6937             atomic_read(&rdev->irq.pflip[0])) {
6938                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6939                 crtc1 |= VBLANK_INTERRUPT_MASK;
6940         }
6941         if (rdev->irq.crtc_vblank_int[1] ||
6942             atomic_read(&rdev->irq.pflip[1])) {
6943                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6944                 crtc2 |= VBLANK_INTERRUPT_MASK;
6945         }
6946         if (rdev->irq.crtc_vblank_int[2] ||
6947             atomic_read(&rdev->irq.pflip[2])) {
6948                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6949                 crtc3 |= VBLANK_INTERRUPT_MASK;
6950         }
6951         if (rdev->irq.crtc_vblank_int[3] ||
6952             atomic_read(&rdev->irq.pflip[3])) {
6953                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6954                 crtc4 |= VBLANK_INTERRUPT_MASK;
6955         }
6956         if (rdev->irq.crtc_vblank_int[4] ||
6957             atomic_read(&rdev->irq.pflip[4])) {
6958                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6959                 crtc5 |= VBLANK_INTERRUPT_MASK;
6960         }
6961         if (rdev->irq.crtc_vblank_int[5] ||
6962             atomic_read(&rdev->irq.pflip[5])) {
6963                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6964                 crtc6 |= VBLANK_INTERRUPT_MASK;
6965         }
6966         if (rdev->irq.hpd[0]) {
6967                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6968                 hpd1 |= DC_HPDx_INT_EN;
6969         }
6970         if (rdev->irq.hpd[1]) {
6971                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6972                 hpd2 |= DC_HPDx_INT_EN;
6973         }
6974         if (rdev->irq.hpd[2]) {
6975                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6976                 hpd3 |= DC_HPDx_INT_EN;
6977         }
6978         if (rdev->irq.hpd[3]) {
6979                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6980                 hpd4 |= DC_HPDx_INT_EN;
6981         }
6982         if (rdev->irq.hpd[4]) {
6983                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6984                 hpd5 |= DC_HPDx_INT_EN;
6985         }
6986         if (rdev->irq.hpd[5]) {
6987                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6988                 hpd6 |= DC_HPDx_INT_EN;
6989         }
6990
6991         if (rdev->irq.dpm_thermal) {
6992                 DRM_DEBUG("dpm thermal\n");
6993                 if (rdev->flags & RADEON_IS_IGP)
6994                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6995                 else
6996                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6997         }
6998
6999         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7000
7001         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7002         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7003
7004         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7005         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7006         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7007         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7008         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7009         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7010         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7011         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7012
7013         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7014
7015         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7016         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7017         if (rdev->num_crtc >= 4) {
7018                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7019                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7020         }
7021         if (rdev->num_crtc >= 6) {
7022                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7023                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7024         }
7025
7026         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7027         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7028         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7029         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7030         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7031         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7032
7033         if (rdev->flags & RADEON_IS_IGP)
7034                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7035         else
7036                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7037
7038         return 0;
7039 }
7040
7041 /**
7042  * cik_irq_ack - ack interrupt sources
7043  *
7044  * @rdev: radeon_device pointer
7045  *
7046  * Ack interrupt sources on the GPU (vblanks, hpd,
7047  * etc.) (CIK).  Certain interrupts sources are sw
7048  * generated and do not require an explicit ack.
7049  */
7050 static inline void cik_irq_ack(struct radeon_device *rdev)
7051 {
7052         u32 tmp;
7053
7054         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7055         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7056         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7057         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7058         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7059         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7060         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7061
7062         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7063                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7064         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7065                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7066         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7067                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7068         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7069                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7070
7071         if (rdev->num_crtc >= 4) {
7072                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7073                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7074                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7075                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7076                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7077                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7078                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7079                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7080         }
7081
7082         if (rdev->num_crtc >= 6) {
7083                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7084                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7085                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7086                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7087                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7088                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7089                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7090                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7091         }
7092
7093         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7094                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7095                 tmp |= DC_HPDx_INT_ACK;
7096                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7097         }
7098         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7099                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7100                 tmp |= DC_HPDx_INT_ACK;
7101                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7102         }
7103         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7104                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7105                 tmp |= DC_HPDx_INT_ACK;
7106                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7107         }
7108         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7109                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7110                 tmp |= DC_HPDx_INT_ACK;
7111                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7112         }
7113         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7114                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7115                 tmp |= DC_HPDx_INT_ACK;
7116                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7117         }
7118         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7119                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7120                 tmp |= DC_HPDx_INT_ACK;
7121                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7122         }
7123 }
7124
7125 /**
7126  * cik_irq_disable - disable interrupts
7127  *
7128  * @rdev: radeon_device pointer
7129  *
7130  * Disable interrupts on the hw (CIK).
7131  */
7132 static void cik_irq_disable(struct radeon_device *rdev)
7133 {
7134         cik_disable_interrupts(rdev);
7135         /* Wait and acknowledge irq */
7136         mdelay(1);
7137         cik_irq_ack(rdev);
7138         cik_disable_interrupt_state(rdev);
7139 }
7140
7141 /**
7142  * cik_irq_disable - disable interrupts for suspend
7143  *
7144  * @rdev: radeon_device pointer
7145  *
7146  * Disable interrupts and stop the RLC (CIK).
7147  * Used for suspend.
7148  */
7149 static void cik_irq_suspend(struct radeon_device *rdev)
7150 {
7151         cik_irq_disable(rdev);
7152         cik_rlc_stop(rdev);
7153 }
7154
7155 /**
7156  * cik_irq_fini - tear down interrupt support
7157  *
7158  * @rdev: radeon_device pointer
7159  *
7160  * Disable interrupts on the hw and free the IH ring
7161  * buffer (CIK).
7162  * Used for driver unload.
7163  */
7164 static void cik_irq_fini(struct radeon_device *rdev)
7165 {
7166         cik_irq_suspend(rdev);
7167         r600_ih_ring_fini(rdev);
7168 }
7169
7170 /**
7171  * cik_get_ih_wptr - get the IH ring buffer wptr
7172  *
7173  * @rdev: radeon_device pointer
7174  *
7175  * Get the IH ring buffer wptr from either the register
7176  * or the writeback memory buffer (CIK).  Also check for
7177  * ring buffer overflow and deal with it.
7178  * Used by cik_irq_process().
7179  * Returns the value of the wptr.
7180  */
7181 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7182 {
7183         u32 wptr, tmp;
7184
7185         if (rdev->wb.enabled)
7186                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7187         else
7188                 wptr = RREG32(IH_RB_WPTR);
7189
7190         if (wptr & RB_OVERFLOW) {
7191                 /* When a ring buffer overflow happen start parsing interrupt
7192                  * from the last not overwritten vector (wptr + 16). Hopefully
7193                  * this should allow us to catchup.
7194                  */
7195                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7196                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7197                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7198                 tmp = RREG32(IH_RB_CNTL);
7199                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7200                 WREG32(IH_RB_CNTL, tmp);
7201         }
7202         return (wptr & rdev->ih.ptr_mask);
7203 }
7204
7205 /*        CIK IV Ring
7206  * Each IV ring entry is 128 bits:
7207  * [7:0]    - interrupt source id
7208  * [31:8]   - reserved
7209  * [59:32]  - interrupt source data
7210  * [63:60]  - reserved
7211  * [71:64]  - RINGID
7212  *            CP:
7213  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7214  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7215  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7216  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7217  *            PIPE_ID - ME0 0=3D
7218  *                    - ME1&2 compute dispatcher (4 pipes each)
7219  *            SDMA:
7220  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7221  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7222  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7223  * [79:72]  - VMID
7224  * [95:80]  - PASID
7225  * [127:96] - reserved
7226  */
7227 /**
7228  * cik_irq_process - interrupt handler
7229  *
7230  * @rdev: radeon_device pointer
7231  *
7232  * Interrupt hander (CIK).  Walk the IH ring,
7233  * ack interrupts and schedule work to handle
7234  * interrupt events.
7235  * Returns irq process return code.
7236  */
7237 int cik_irq_process(struct radeon_device *rdev)
7238 {
7239         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7240         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7241         u32 wptr;
7242         u32 rptr;
7243         u32 src_id, src_data, ring_id;
7244         u8 me_id, pipe_id, queue_id;
7245         u32 ring_index;
7246         bool queue_hotplug = false;
7247         bool queue_reset = false;
7248         u32 addr, status, mc_client;
7249         bool queue_thermal = false;
7250
7251         if (!rdev->ih.enabled || rdev->shutdown)
7252                 return IRQ_NONE;
7253
7254         wptr = cik_get_ih_wptr(rdev);
7255
7256 restart_ih:
7257         /* is somebody else already processing irqs? */
7258         if (atomic_xchg(&rdev->ih.lock, 1))
7259                 return IRQ_NONE;
7260
7261         rptr = rdev->ih.rptr;
7262         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7263
7264         /* Order reading of wptr vs. reading of IH ring data */
7265         rmb();
7266
7267         /* display interrupts */
7268         cik_irq_ack(rdev);
7269
7270         while (rptr != wptr) {
7271                 /* wptr/rptr are in bytes! */
7272                 ring_index = rptr / 4;
7273                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7274                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7275                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7276
7277                 switch (src_id) {
7278                 case 1: /* D1 vblank/vline */
7279                         switch (src_data) {
7280                         case 0: /* D1 vblank */
7281                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7282                                         if (rdev->irq.crtc_vblank_int[0]) {
7283                                                 drm_handle_vblank(rdev->ddev, 0);
7284                                                 rdev->pm.vblank_sync = true;
7285                                                 wake_up(&rdev->irq.vblank_queue);
7286                                         }
7287                                         if (atomic_read(&rdev->irq.pflip[0]))
7288                                                 radeon_crtc_handle_flip(rdev, 0);
7289                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7290                                         DRM_DEBUG("IH: D1 vblank\n");
7291                                 }
7292                                 break;
7293                         case 1: /* D1 vline */
7294                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7295                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7296                                         DRM_DEBUG("IH: D1 vline\n");
7297                                 }
7298                                 break;
7299                         default:
7300                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7301                                 break;
7302                         }
7303                         break;
7304                 case 2: /* D2 vblank/vline */
7305                         switch (src_data) {
7306                         case 0: /* D2 vblank */
7307                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7308                                         if (rdev->irq.crtc_vblank_int[1]) {
7309                                                 drm_handle_vblank(rdev->ddev, 1);
7310                                                 rdev->pm.vblank_sync = true;
7311                                                 wake_up(&rdev->irq.vblank_queue);
7312                                         }
7313                                         if (atomic_read(&rdev->irq.pflip[1]))
7314                                                 radeon_crtc_handle_flip(rdev, 1);
7315                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7316                                         DRM_DEBUG("IH: D2 vblank\n");
7317                                 }
7318                                 break;
7319                         case 1: /* D2 vline */
7320                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7321                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7322                                         DRM_DEBUG("IH: D2 vline\n");
7323                                 }
7324                                 break;
7325                         default:
7326                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7327                                 break;
7328                         }
7329                         break;
7330                 case 3: /* D3 vblank/vline */
7331                         switch (src_data) {
7332                         case 0: /* D3 vblank */
7333                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7334                                         if (rdev->irq.crtc_vblank_int[2]) {
7335                                                 drm_handle_vblank(rdev->ddev, 2);
7336                                                 rdev->pm.vblank_sync = true;
7337                                                 wake_up(&rdev->irq.vblank_queue);
7338                                         }
7339                                         if (atomic_read(&rdev->irq.pflip[2]))
7340                                                 radeon_crtc_handle_flip(rdev, 2);
7341                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7342                                         DRM_DEBUG("IH: D3 vblank\n");
7343                                 }
7344                                 break;
7345                         case 1: /* D3 vline */
7346                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7347                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7348                                         DRM_DEBUG("IH: D3 vline\n");
7349                                 }
7350                                 break;
7351                         default:
7352                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7353                                 break;
7354                         }
7355                         break;
7356                 case 4: /* D4 vblank/vline */
7357                         switch (src_data) {
7358                         case 0: /* D4 vblank */
7359                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7360                                         if (rdev->irq.crtc_vblank_int[3]) {
7361                                                 drm_handle_vblank(rdev->ddev, 3);
7362                                                 rdev->pm.vblank_sync = true;
7363                                                 wake_up(&rdev->irq.vblank_queue);
7364                                         }
7365                                         if (atomic_read(&rdev->irq.pflip[3]))
7366                                                 radeon_crtc_handle_flip(rdev, 3);
7367                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7368                                         DRM_DEBUG("IH: D4 vblank\n");
7369                                 }
7370                                 break;
7371                         case 1: /* D4 vline */
7372                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7373                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7374                                         DRM_DEBUG("IH: D4 vline\n");
7375                                 }
7376                                 break;
7377                         default:
7378                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7379                                 break;
7380                         }
7381                         break;
7382                 case 5: /* D5 vblank/vline */
7383                         switch (src_data) {
7384                         case 0: /* D5 vblank */
7385                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7386                                         if (rdev->irq.crtc_vblank_int[4]) {
7387                                                 drm_handle_vblank(rdev->ddev, 4);
7388                                                 rdev->pm.vblank_sync = true;
7389                                                 wake_up(&rdev->irq.vblank_queue);
7390                                         }
7391                                         if (atomic_read(&rdev->irq.pflip[4]))
7392                                                 radeon_crtc_handle_flip(rdev, 4);
7393                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7394                                         DRM_DEBUG("IH: D5 vblank\n");
7395                                 }
7396                                 break;
7397                         case 1: /* D5 vline */
7398                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7399                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7400                                         DRM_DEBUG("IH: D5 vline\n");
7401                                 }
7402                                 break;
7403                         default:
7404                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7405                                 break;
7406                         }
7407                         break;
7408                 case 6: /* D6 vblank/vline */
7409                         switch (src_data) {
7410                         case 0: /* D6 vblank */
7411                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7412                                         if (rdev->irq.crtc_vblank_int[5]) {
7413                                                 drm_handle_vblank(rdev->ddev, 5);
7414                                                 rdev->pm.vblank_sync = true;
7415                                                 wake_up(&rdev->irq.vblank_queue);
7416                                         }
7417                                         if (atomic_read(&rdev->irq.pflip[5]))
7418                                                 radeon_crtc_handle_flip(rdev, 5);
7419                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7420                                         DRM_DEBUG("IH: D6 vblank\n");
7421                                 }
7422                                 break;
7423                         case 1: /* D6 vline */
7424                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7425                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7426                                         DRM_DEBUG("IH: D6 vline\n");
7427                                 }
7428                                 break;
7429                         default:
7430                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7431                                 break;
7432                         }
7433                         break;
7434                 case 42: /* HPD hotplug */
7435                         switch (src_data) {
7436                         case 0:
7437                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7438                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7439                                         queue_hotplug = true;
7440                                         DRM_DEBUG("IH: HPD1\n");
7441                                 }
7442                                 break;
7443                         case 1:
7444                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7445                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7446                                         queue_hotplug = true;
7447                                         DRM_DEBUG("IH: HPD2\n");
7448                                 }
7449                                 break;
7450                         case 2:
7451                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7452                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7453                                         queue_hotplug = true;
7454                                         DRM_DEBUG("IH: HPD3\n");
7455                                 }
7456                                 break;
7457                         case 3:
7458                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7459                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7460                                         queue_hotplug = true;
7461                                         DRM_DEBUG("IH: HPD4\n");
7462                                 }
7463                                 break;
7464                         case 4:
7465                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7466                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7467                                         queue_hotplug = true;
7468                                         DRM_DEBUG("IH: HPD5\n");
7469                                 }
7470                                 break;
7471                         case 5:
7472                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7473                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7474                                         queue_hotplug = true;
7475                                         DRM_DEBUG("IH: HPD6\n");
7476                                 }
7477                                 break;
7478                         default:
7479                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7480                                 break;
7481                         }
7482                         break;
7483                 case 124: /* UVD */
7484                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7485                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7486                         break;
7487                 case 146:
7488                 case 147:
7489                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7490                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7491                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7492                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7493                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7494                                 addr);
7495                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7496                                 status);
7497                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7498                         /* reset addr and status */
7499                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7500                         break;
7501                 case 176: /* GFX RB CP_INT */
7502                 case 177: /* GFX IB CP_INT */
7503                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7504                         break;
7505                 case 181: /* CP EOP event */
7506                         DRM_DEBUG("IH: CP EOP\n");
7507                         /* XXX check the bitfield order! */
7508                         me_id = (ring_id & 0x60) >> 5;
7509                         pipe_id = (ring_id & 0x18) >> 3;
7510                         queue_id = (ring_id & 0x7) >> 0;
7511                         switch (me_id) {
7512                         case 0:
7513                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7514                                 break;
7515                         case 1:
7516                         case 2:
7517                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7518                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7519                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7520                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7521                                 break;
7522                         }
7523                         break;
7524                 case 184: /* CP Privileged reg access */
7525                         DRM_ERROR("Illegal register access in command stream\n");
7526                         /* XXX check the bitfield order! */
7527                         me_id = (ring_id & 0x60) >> 5;
7528                         pipe_id = (ring_id & 0x18) >> 3;
7529                         queue_id = (ring_id & 0x7) >> 0;
7530                         switch (me_id) {
7531                         case 0:
7532                                 /* This results in a full GPU reset, but all we need to do is soft
7533                                  * reset the CP for gfx
7534                                  */
7535                                 queue_reset = true;
7536                                 break;
7537                         case 1:
7538                                 /* XXX compute */
7539                                 queue_reset = true;
7540                                 break;
7541                         case 2:
7542                                 /* XXX compute */
7543                                 queue_reset = true;
7544                                 break;
7545                         }
7546                         break;
7547                 case 185: /* CP Privileged inst */
7548                         DRM_ERROR("Illegal instruction in command stream\n");
7549                         /* XXX check the bitfield order! */
7550                         me_id = (ring_id & 0x60) >> 5;
7551                         pipe_id = (ring_id & 0x18) >> 3;
7552                         queue_id = (ring_id & 0x7) >> 0;
7553                         switch (me_id) {
7554                         case 0:
7555                                 /* This results in a full GPU reset, but all we need to do is soft
7556                                  * reset the CP for gfx
7557                                  */
7558                                 queue_reset = true;
7559                                 break;
7560                         case 1:
7561                                 /* XXX compute */
7562                                 queue_reset = true;
7563                                 break;
7564                         case 2:
7565                                 /* XXX compute */
7566                                 queue_reset = true;
7567                                 break;
7568                         }
7569                         break;
7570                 case 224: /* SDMA trap event */
7571                         /* XXX check the bitfield order! */
7572                         me_id = (ring_id & 0x3) >> 0;
7573                         queue_id = (ring_id & 0xc) >> 2;
7574                         DRM_DEBUG("IH: SDMA trap\n");
7575                         switch (me_id) {
7576                         case 0:
7577                                 switch (queue_id) {
7578                                 case 0:
7579                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7580                                         break;
7581                                 case 1:
7582                                         /* XXX compute */
7583                                         break;
7584                                 case 2:
7585                                         /* XXX compute */
7586                                         break;
7587                                 }
7588                                 break;
7589                         case 1:
7590                                 switch (queue_id) {
7591                                 case 0:
7592                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7593                                         break;
7594                                 case 1:
7595                                         /* XXX compute */
7596                                         break;
7597                                 case 2:
7598                                         /* XXX compute */
7599                                         break;
7600                                 }
7601                                 break;
7602                         }
7603                         break;
7604                 case 230: /* thermal low to high */
7605                         DRM_DEBUG("IH: thermal low to high\n");
7606                         rdev->pm.dpm.thermal.high_to_low = false;
7607                         queue_thermal = true;
7608                         break;
7609                 case 231: /* thermal high to low */
7610                         DRM_DEBUG("IH: thermal high to low\n");
7611                         rdev->pm.dpm.thermal.high_to_low = true;
7612                         queue_thermal = true;
7613                         break;
7614                 case 233: /* GUI IDLE */
7615                         DRM_DEBUG("IH: GUI idle\n");
7616                         break;
7617                 case 241: /* SDMA Privileged inst */
7618                 case 247: /* SDMA Privileged inst */
7619                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7620                         /* XXX check the bitfield order! */
7621                         me_id = (ring_id & 0x3) >> 0;
7622                         queue_id = (ring_id & 0xc) >> 2;
7623                         switch (me_id) {
7624                         case 0:
7625                                 switch (queue_id) {
7626                                 case 0:
7627                                         queue_reset = true;
7628                                         break;
7629                                 case 1:
7630                                         /* XXX compute */
7631                                         queue_reset = true;
7632                                         break;
7633                                 case 2:
7634                                         /* XXX compute */
7635                                         queue_reset = true;
7636                                         break;
7637                                 }
7638                                 break;
7639                         case 1:
7640                                 switch (queue_id) {
7641                                 case 0:
7642                                         queue_reset = true;
7643                                         break;
7644                                 case 1:
7645                                         /* XXX compute */
7646                                         queue_reset = true;
7647                                         break;
7648                                 case 2:
7649                                         /* XXX compute */
7650                                         queue_reset = true;
7651                                         break;
7652                                 }
7653                                 break;
7654                         }
7655                         break;
7656                 default:
7657                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7658                         break;
7659                 }
7660
7661                 /* wptr/rptr are in bytes! */
7662                 rptr += 16;
7663                 rptr &= rdev->ih.ptr_mask;
7664         }
7665         if (queue_hotplug)
7666                 schedule_work(&rdev->hotplug_work);
7667         if (queue_reset)
7668                 schedule_work(&rdev->reset_work);
7669         if (queue_thermal)
7670                 schedule_work(&rdev->pm.dpm.thermal.work);
7671         rdev->ih.rptr = rptr;
7672         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7673         atomic_set(&rdev->ih.lock, 0);
7674
7675         /* make sure wptr hasn't changed while processing */
7676         wptr = cik_get_ih_wptr(rdev);
7677         if (wptr != rptr)
7678                 goto restart_ih;
7679
7680         return IRQ_HANDLED;
7681 }
7682
7683 /*
7684  * startup/shutdown callbacks
7685  */
7686 /**
7687  * cik_startup - program the asic to a functional state
7688  *
7689  * @rdev: radeon_device pointer
7690  *
7691  * Programs the asic to a functional state (CIK).
7692  * Called by cik_init() and cik_resume().
7693  * Returns 0 for success, error for failure.
7694  */
7695 static int cik_startup(struct radeon_device *rdev)
7696 {
7697         struct radeon_ring *ring;
7698         int r;
7699
7700         /* enable pcie gen2/3 link */
7701         cik_pcie_gen3_enable(rdev);
7702         /* enable aspm */
7703         cik_program_aspm(rdev);
7704
7705         /* scratch needs to be initialized before MC */
7706         r = r600_vram_scratch_init(rdev);
7707         if (r)
7708                 return r;
7709
7710         cik_mc_program(rdev);
7711
7712         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7713                 r = ci_mc_load_microcode(rdev);
7714                 if (r) {
7715                         DRM_ERROR("Failed to load MC firmware!\n");
7716                         return r;
7717                 }
7718         }
7719
7720         r = cik_pcie_gart_enable(rdev);
7721         if (r)
7722                 return r;
7723         cik_gpu_init(rdev);
7724
7725         /* allocate rlc buffers */
7726         if (rdev->flags & RADEON_IS_IGP) {
7727                 if (rdev->family == CHIP_KAVERI) {
7728                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7729                         rdev->rlc.reg_list_size =
7730                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7731                 } else {
7732                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7733                         rdev->rlc.reg_list_size =
7734                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7735                 }
7736         }
7737         rdev->rlc.cs_data = ci_cs_data;
7738         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7739         r = sumo_rlc_init(rdev);
7740         if (r) {
7741                 DRM_ERROR("Failed to init rlc BOs!\n");
7742                 return r;
7743         }
7744
7745         /* allocate wb buffer */
7746         r = radeon_wb_init(rdev);
7747         if (r)
7748                 return r;
7749
7750         /* allocate mec buffers */
7751         r = cik_mec_init(rdev);
7752         if (r) {
7753                 DRM_ERROR("Failed to init MEC BOs!\n");
7754                 return r;
7755         }
7756
7757         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7758         if (r) {
7759                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7760                 return r;
7761         }
7762
7763         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7764         if (r) {
7765                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7766                 return r;
7767         }
7768
7769         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7770         if (r) {
7771                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7772                 return r;
7773         }
7774
7775         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7776         if (r) {
7777                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7778                 return r;
7779         }
7780
7781         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7782         if (r) {
7783                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7784                 return r;
7785         }
7786
7787         r = radeon_uvd_resume(rdev);
7788         if (!r) {
7789                 r = uvd_v4_2_resume(rdev);
7790                 if (!r) {
7791                         r = radeon_fence_driver_start_ring(rdev,
7792                                                            R600_RING_TYPE_UVD_INDEX);
7793                         if (r)
7794                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7795                 }
7796         }
7797         if (r)
7798                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7799
7800         /* Enable IRQ */
7801         if (!rdev->irq.installed) {
7802                 r = radeon_irq_kms_init(rdev);
7803                 if (r)
7804                         return r;
7805         }
7806
7807         r = cik_irq_init(rdev);
7808         if (r) {
7809                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7810                 radeon_irq_kms_fini(rdev);
7811                 return r;
7812         }
7813         cik_irq_set(rdev);
7814
7815         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7816         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7817                              PACKET3(PACKET3_NOP, 0x3FFF));
7818         if (r)
7819                 return r;
7820
7821         /* set up the compute queues */
7822         /* type-2 packets are deprecated on MEC, use type-3 instead */
7823         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7824         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7825                              PACKET3(PACKET3_NOP, 0x3FFF));
7826         if (r)
7827                 return r;
7828         ring->me = 1; /* first MEC */
7829         ring->pipe = 0; /* first pipe */
7830         ring->queue = 0; /* first queue */
7831         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7832
7833         /* type-2 packets are deprecated on MEC, use type-3 instead */
7834         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7835         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7836                              PACKET3(PACKET3_NOP, 0x3FFF));
7837         if (r)
7838                 return r;
7839         /* dGPU only have 1 MEC */
7840         ring->me = 1; /* first MEC */
7841         ring->pipe = 0; /* first pipe */
7842         ring->queue = 1; /* second queue */
7843         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7844
7845         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7846         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7847                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7848         if (r)
7849                 return r;
7850
7851         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7852         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7853                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7854         if (r)
7855                 return r;
7856
7857         r = cik_cp_resume(rdev);
7858         if (r)
7859                 return r;
7860
7861         r = cik_sdma_resume(rdev);
7862         if (r)
7863                 return r;
7864
7865         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7866         if (ring->ring_size) {
7867                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7868                                      RADEON_CP_PACKET2);
7869                 if (!r)
7870                         r = uvd_v1_0_init(rdev);
7871                 if (r)
7872                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7873         }
7874
7875         r = radeon_ib_pool_init(rdev);
7876         if (r) {
7877                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7878                 return r;
7879         }
7880
7881         r = radeon_vm_manager_init(rdev);
7882         if (r) {
7883                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7884                 return r;
7885         }
7886
7887         r = dce6_audio_init(rdev);
7888         if (r)
7889                 return r;
7890
7891         return 0;
7892 }
7893
7894 /**
7895  * cik_resume - resume the asic to a functional state
7896  *
7897  * @rdev: radeon_device pointer
7898  *
7899  * Programs the asic to a functional state (CIK).
7900  * Called at resume.
7901  * Returns 0 for success, error for failure.
7902  */
7903 int cik_resume(struct radeon_device *rdev)
7904 {
7905         int r;
7906
7907         /* post card */
7908         atom_asic_init(rdev->mode_info.atom_context);
7909
7910         /* init golden registers */
7911         cik_init_golden_registers(rdev);
7912
7913         radeon_pm_resume(rdev);
7914
7915         rdev->accel_working = true;
7916         r = cik_startup(rdev);
7917         if (r) {
7918                 DRM_ERROR("cik startup failed on resume\n");
7919                 rdev->accel_working = false;
7920                 return r;
7921         }
7922
7923         return r;
7924
7925 }
7926
7927 /**
7928  * cik_suspend - suspend the asic
7929  *
7930  * @rdev: radeon_device pointer
7931  *
7932  * Bring the chip into a state suitable for suspend (CIK).
7933  * Called at suspend.
7934  * Returns 0 for success.
7935  */
7936 int cik_suspend(struct radeon_device *rdev)
7937 {
7938         radeon_pm_suspend(rdev);
7939         dce6_audio_fini(rdev);
7940         radeon_vm_manager_fini(rdev);
7941         cik_cp_enable(rdev, false);
7942         cik_sdma_enable(rdev, false);
7943         uvd_v1_0_fini(rdev);
7944         radeon_uvd_suspend(rdev);
7945         cik_fini_pg(rdev);
7946         cik_fini_cg(rdev);
7947         cik_irq_suspend(rdev);
7948         radeon_wb_disable(rdev);
7949         cik_pcie_gart_disable(rdev);
7950         return 0;
7951 }
7952
7953 /* Plan is to move initialization in that function and use
7954  * helper function so that radeon_device_init pretty much
7955  * do nothing more than calling asic specific function. This
7956  * should also allow to remove a bunch of callback function
7957  * like vram_info.
7958  */
7959 /**
7960  * cik_init - asic specific driver and hw init
7961  *
7962  * @rdev: radeon_device pointer
7963  *
7964  * Setup asic specific driver variables and program the hw
7965  * to a functional state (CIK).
7966  * Called at driver startup.
7967  * Returns 0 for success, errors for failure.
7968  */
7969 int cik_init(struct radeon_device *rdev)
7970 {
7971         struct radeon_ring *ring;
7972         int r;
7973
7974         /* Read BIOS */
7975         if (!radeon_get_bios(rdev)) {
7976                 if (ASIC_IS_AVIVO(rdev))
7977                         return -EINVAL;
7978         }
7979         /* Must be an ATOMBIOS */
7980         if (!rdev->is_atom_bios) {
7981                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7982                 return -EINVAL;
7983         }
7984         r = radeon_atombios_init(rdev);
7985         if (r)
7986                 return r;
7987
7988         /* Post card if necessary */
7989         if (!radeon_card_posted(rdev)) {
7990                 if (!rdev->bios) {
7991                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7992                         return -EINVAL;
7993                 }
7994                 DRM_INFO("GPU not posted. posting now...\n");
7995                 atom_asic_init(rdev->mode_info.atom_context);
7996         }
7997         /* init golden registers */
7998         cik_init_golden_registers(rdev);
7999         /* Initialize scratch registers */
8000         cik_scratch_init(rdev);
8001         /* Initialize surface registers */
8002         radeon_surface_init(rdev);
8003         /* Initialize clocks */
8004         radeon_get_clock_info(rdev->ddev);
8005
8006         /* Fence driver */
8007         r = radeon_fence_driver_init(rdev);
8008         if (r)
8009                 return r;
8010
8011         /* initialize memory controller */
8012         r = cik_mc_init(rdev);
8013         if (r)
8014                 return r;
8015         /* Memory manager */
8016         r = radeon_bo_init(rdev);
8017         if (r)
8018                 return r;
8019
8020         if (rdev->flags & RADEON_IS_IGP) {
8021                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8022                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8023                         r = cik_init_microcode(rdev);
8024                         if (r) {
8025                                 DRM_ERROR("Failed to load firmware!\n");
8026                                 return r;
8027                         }
8028                 }
8029         } else {
8030                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8031                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8032                     !rdev->mc_fw) {
8033                         r = cik_init_microcode(rdev);
8034                         if (r) {
8035                                 DRM_ERROR("Failed to load firmware!\n");
8036                                 return r;
8037                         }
8038                 }
8039         }
8040
8041         /* Initialize power management */
8042         radeon_pm_init(rdev);
8043
8044         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8045         ring->ring_obj = NULL;
8046         r600_ring_init(rdev, ring, 1024 * 1024);
8047
8048         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8049         ring->ring_obj = NULL;
8050         r600_ring_init(rdev, ring, 1024 * 1024);
8051         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8052         if (r)
8053                 return r;
8054
8055         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8056         ring->ring_obj = NULL;
8057         r600_ring_init(rdev, ring, 1024 * 1024);
8058         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8059         if (r)
8060                 return r;
8061
8062         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8063         ring->ring_obj = NULL;
8064         r600_ring_init(rdev, ring, 256 * 1024);
8065
8066         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8067         ring->ring_obj = NULL;
8068         r600_ring_init(rdev, ring, 256 * 1024);
8069
8070         r = radeon_uvd_init(rdev);
8071         if (!r) {
8072                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8073                 ring->ring_obj = NULL;
8074                 r600_ring_init(rdev, ring, 4096);
8075         }
8076
8077         rdev->ih.ring_obj = NULL;
8078         r600_ih_ring_init(rdev, 64 * 1024);
8079
8080         r = r600_pcie_gart_init(rdev);
8081         if (r)
8082                 return r;
8083
8084         rdev->accel_working = true;
8085         r = cik_startup(rdev);
8086         if (r) {
8087                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8088                 cik_cp_fini(rdev);
8089                 cik_sdma_fini(rdev);
8090                 cik_irq_fini(rdev);
8091                 sumo_rlc_fini(rdev);
8092                 cik_mec_fini(rdev);
8093                 radeon_wb_fini(rdev);
8094                 radeon_ib_pool_fini(rdev);
8095                 radeon_vm_manager_fini(rdev);
8096                 radeon_irq_kms_fini(rdev);
8097                 cik_pcie_gart_fini(rdev);
8098                 rdev->accel_working = false;
8099         }
8100
8101         /* Don't start up if the MC ucode is missing.
8102          * The default clocks and voltages before the MC ucode
8103          * is loaded are not suffient for advanced operations.
8104          */
8105         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8106                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8107                 return -EINVAL;
8108         }
8109
8110         return 0;
8111 }
8112
8113 /**
8114  * cik_fini - asic specific driver and hw fini
8115  *
8116  * @rdev: radeon_device pointer
8117  *
8118  * Tear down the asic specific driver variables and program the hw
8119  * to an idle state (CIK).
8120  * Called at driver unload.
8121  */
8122 void cik_fini(struct radeon_device *rdev)
8123 {
8124         radeon_pm_fini(rdev);
8125         cik_cp_fini(rdev);
8126         cik_sdma_fini(rdev);
8127         cik_fini_pg(rdev);
8128         cik_fini_cg(rdev);
8129         cik_irq_fini(rdev);
8130         sumo_rlc_fini(rdev);
8131         cik_mec_fini(rdev);
8132         radeon_wb_fini(rdev);
8133         radeon_vm_manager_fini(rdev);
8134         radeon_ib_pool_fini(rdev);
8135         radeon_irq_kms_fini(rdev);
8136         uvd_v1_0_fini(rdev);
8137         radeon_uvd_fini(rdev);
8138         cik_pcie_gart_fini(rdev);
8139         r600_vram_scratch_fini(rdev);
8140         radeon_gem_fini(rdev);
8141         radeon_fence_driver_fini(rdev);
8142         radeon_bo_fini(rdev);
8143         radeon_atombios_fini(rdev);
8144         kfree(rdev->bios);
8145         rdev->bios = NULL;
8146 }
8147
8148 void dce8_program_fmt(struct drm_encoder *encoder)
8149 {
8150         struct drm_device *dev = encoder->dev;
8151         struct radeon_device *rdev = dev->dev_private;
8152         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8153         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8154         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8155         int bpc = 0;
8156         u32 tmp = 0;
8157         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8158
8159         if (connector) {
8160                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8161                 bpc = radeon_get_monitor_bpc(connector);
8162                 dither = radeon_connector->dither;
8163         }
8164
8165         /* LVDS/eDP FMT is set up by atom */
8166         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8167                 return;
8168
8169         /* not needed for analog */
8170         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8171             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8172                 return;
8173
8174         if (bpc == 0)
8175                 return;
8176
8177         switch (bpc) {
8178         case 6:
8179                 if (dither == RADEON_FMT_DITHER_ENABLE)
8180                         /* XXX sort out optimal dither settings */
8181                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8182                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8183                 else
8184                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8185                 break;
8186         case 8:
8187                 if (dither == RADEON_FMT_DITHER_ENABLE)
8188                         /* XXX sort out optimal dither settings */
8189                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8190                                 FMT_RGB_RANDOM_ENABLE |
8191                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8192                 else
8193                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8194                 break;
8195         case 10:
8196                 if (dither == RADEON_FMT_DITHER_ENABLE)
8197                         /* XXX sort out optimal dither settings */
8198                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8199                                 FMT_RGB_RANDOM_ENABLE |
8200                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8201                 else
8202                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8203                 break;
8204         default:
8205                 /* not needed */
8206                 break;
8207         }
8208
8209         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8210 }
8211
8212 /* display watermark setup */
8213 /**
8214  * dce8_line_buffer_adjust - Set up the line buffer
8215  *
8216  * @rdev: radeon_device pointer
8217  * @radeon_crtc: the selected display controller
8218  * @mode: the current display mode on the selected display
8219  * controller
8220  *
8221  * Setup up the line buffer allocation for
8222  * the selected display controller (CIK).
8223  * Returns the line buffer size in pixels.
8224  */
8225 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8226                                    struct radeon_crtc *radeon_crtc,
8227                                    struct drm_display_mode *mode)
8228 {
8229         u32 tmp, buffer_alloc, i;
8230         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8231         /*
8232          * Line Buffer Setup
8233          * There are 6 line buffers, one for each display controllers.
8234          * There are 3 partitions per LB. Select the number of partitions
8235          * to enable based on the display width.  For display widths larger
8236          * than 4096, you need use to use 2 display controllers and combine
8237          * them using the stereo blender.
8238          */
8239         if (radeon_crtc->base.enabled && mode) {
8240                 if (mode->crtc_hdisplay < 1920) {
8241                         tmp = 1;
8242                         buffer_alloc = 2;
8243                 } else if (mode->crtc_hdisplay < 2560) {
8244                         tmp = 2;
8245                         buffer_alloc = 2;
8246                 } else if (mode->crtc_hdisplay < 4096) {
8247                         tmp = 0;
8248                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8249                 } else {
8250                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8251                         tmp = 0;
8252                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8253                 }
8254         } else {
8255                 tmp = 1;
8256                 buffer_alloc = 0;
8257         }
8258
8259         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8260                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8261
8262         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8263                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8264         for (i = 0; i < rdev->usec_timeout; i++) {
8265                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8266                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8267                         break;
8268                 udelay(1);
8269         }
8270
8271         if (radeon_crtc->base.enabled && mode) {
8272                 switch (tmp) {
8273                 case 0:
8274                 default:
8275                         return 4096 * 2;
8276                 case 1:
8277                         return 1920 * 2;
8278                 case 2:
8279                         return 2560 * 2;
8280                 }
8281         }
8282
8283         /* controller not enabled, so no lb used */
8284         return 0;
8285 }
8286
8287 /**
8288  * cik_get_number_of_dram_channels - get the number of dram channels
8289  *
8290  * @rdev: radeon_device pointer
8291  *
8292  * Look up the number of video ram channels (CIK).
8293  * Used for display watermark bandwidth calculations
8294  * Returns the number of dram channels
8295  */
8296 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8297 {
8298         u32 tmp = RREG32(MC_SHARED_CHMAP);
8299
8300         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8301         case 0:
8302         default:
8303                 return 1;
8304         case 1:
8305                 return 2;
8306         case 2:
8307                 return 4;
8308         case 3:
8309                 return 8;
8310         case 4:
8311                 return 3;
8312         case 5:
8313                 return 6;
8314         case 6:
8315                 return 10;
8316         case 7:
8317                 return 12;
8318         case 8:
8319                 return 16;
8320         }
8321 }
8322
8323 struct dce8_wm_params {
8324         u32 dram_channels; /* number of dram channels */
8325         u32 yclk;          /* bandwidth per dram data pin in kHz */
8326         u32 sclk;          /* engine clock in kHz */
8327         u32 disp_clk;      /* display clock in kHz */
8328         u32 src_width;     /* viewport width */
8329         u32 active_time;   /* active display time in ns */
8330         u32 blank_time;    /* blank time in ns */
8331         bool interlaced;    /* mode is interlaced */
8332         fixed20_12 vsc;    /* vertical scale ratio */
8333         u32 num_heads;     /* number of active crtcs */
8334         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8335         u32 lb_size;       /* line buffer allocated to pipe */
8336         u32 vtaps;         /* vertical scaler taps */
8337 };
8338
8339 /**
8340  * dce8_dram_bandwidth - get the dram bandwidth
8341  *
8342  * @wm: watermark calculation data
8343  *
8344  * Calculate the raw dram bandwidth (CIK).
8345  * Used for display watermark bandwidth calculations
8346  * Returns the dram bandwidth in MBytes/s
8347  */
8348 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8349 {
8350         /* Calculate raw DRAM Bandwidth */
8351         fixed20_12 dram_efficiency; /* 0.7 */
8352         fixed20_12 yclk, dram_channels, bandwidth;
8353         fixed20_12 a;
8354
8355         a.full = dfixed_const(1000);
8356         yclk.full = dfixed_const(wm->yclk);
8357         yclk.full = dfixed_div(yclk, a);
8358         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8359         a.full = dfixed_const(10);
8360         dram_efficiency.full = dfixed_const(7);
8361         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8362         bandwidth.full = dfixed_mul(dram_channels, yclk);
8363         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8364
8365         return dfixed_trunc(bandwidth);
8366 }
8367
8368 /**
8369  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8370  *
8371  * @wm: watermark calculation data
8372  *
8373  * Calculate the dram bandwidth used for display (CIK).
8374  * Used for display watermark bandwidth calculations
8375  * Returns the dram bandwidth for display in MBytes/s
8376  */
8377 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8378 {
8379         /* Calculate DRAM Bandwidth and the part allocated to display. */
8380         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8381         fixed20_12 yclk, dram_channels, bandwidth;
8382         fixed20_12 a;
8383
8384         a.full = dfixed_const(1000);
8385         yclk.full = dfixed_const(wm->yclk);
8386         yclk.full = dfixed_div(yclk, a);
8387         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8388         a.full = dfixed_const(10);
8389         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8390         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8391         bandwidth.full = dfixed_mul(dram_channels, yclk);
8392         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8393
8394         return dfixed_trunc(bandwidth);
8395 }
8396
8397 /**
8398  * dce8_data_return_bandwidth - get the data return bandwidth
8399  *
8400  * @wm: watermark calculation data
8401  *
8402  * Calculate the data return bandwidth used for display (CIK).
8403  * Used for display watermark bandwidth calculations
8404  * Returns the data return bandwidth in MBytes/s
8405  */
8406 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8407 {
8408         /* Calculate the display Data return Bandwidth */
8409         fixed20_12 return_efficiency; /* 0.8 */
8410         fixed20_12 sclk, bandwidth;
8411         fixed20_12 a;
8412
8413         a.full = dfixed_const(1000);
8414         sclk.full = dfixed_const(wm->sclk);
8415         sclk.full = dfixed_div(sclk, a);
8416         a.full = dfixed_const(10);
8417         return_efficiency.full = dfixed_const(8);
8418         return_efficiency.full = dfixed_div(return_efficiency, a);
8419         a.full = dfixed_const(32);
8420         bandwidth.full = dfixed_mul(a, sclk);
8421         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8422
8423         return dfixed_trunc(bandwidth);
8424 }
8425
8426 /**
8427  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8428  *
8429  * @wm: watermark calculation data
8430  *
8431  * Calculate the dmif bandwidth used for display (CIK).
8432  * Used for display watermark bandwidth calculations
8433  * Returns the dmif bandwidth in MBytes/s
8434  */
8435 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8436 {
8437         /* Calculate the DMIF Request Bandwidth */
8438         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8439         fixed20_12 disp_clk, bandwidth;
8440         fixed20_12 a, b;
8441
8442         a.full = dfixed_const(1000);
8443         disp_clk.full = dfixed_const(wm->disp_clk);
8444         disp_clk.full = dfixed_div(disp_clk, a);
8445         a.full = dfixed_const(32);
8446         b.full = dfixed_mul(a, disp_clk);
8447
8448         a.full = dfixed_const(10);
8449         disp_clk_request_efficiency.full = dfixed_const(8);
8450         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8451
8452         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8453
8454         return dfixed_trunc(bandwidth);
8455 }
8456
8457 /**
8458  * dce8_available_bandwidth - get the min available bandwidth
8459  *
8460  * @wm: watermark calculation data
8461  *
8462  * Calculate the min available bandwidth used for display (CIK).
8463  * Used for display watermark bandwidth calculations
8464  * Returns the min available bandwidth in MBytes/s
8465  */
8466 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8467 {
8468         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8469         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8470         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8471         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8472
8473         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8474 }
8475
8476 /**
8477  * dce8_average_bandwidth - get the average available bandwidth
8478  *
8479  * @wm: watermark calculation data
8480  *
8481  * Calculate the average available bandwidth used for display (CIK).
8482  * Used for display watermark bandwidth calculations
8483  * Returns the average available bandwidth in MBytes/s
8484  */
8485 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8486 {
8487         /* Calculate the display mode Average Bandwidth
8488          * DisplayMode should contain the source and destination dimensions,
8489          * timing, etc.
8490          */
8491         fixed20_12 bpp;
8492         fixed20_12 line_time;
8493         fixed20_12 src_width;
8494         fixed20_12 bandwidth;
8495         fixed20_12 a;
8496
8497         a.full = dfixed_const(1000);
8498         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8499         line_time.full = dfixed_div(line_time, a);
8500         bpp.full = dfixed_const(wm->bytes_per_pixel);
8501         src_width.full = dfixed_const(wm->src_width);
8502         bandwidth.full = dfixed_mul(src_width, bpp);
8503         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8504         bandwidth.full = dfixed_div(bandwidth, line_time);
8505
8506         return dfixed_trunc(bandwidth);
8507 }
8508
8509 /**
8510  * dce8_latency_watermark - get the latency watermark
8511  *
8512  * @wm: watermark calculation data
8513  *
8514  * Calculate the latency watermark (CIK).
8515  * Used for display watermark bandwidth calculations
8516  * Returns the latency watermark in ns
8517  */
8518 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8519 {
8520         /* First calculate the latency in ns */
8521         u32 mc_latency = 2000; /* 2000 ns. */
8522         u32 available_bandwidth = dce8_available_bandwidth(wm);
8523         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8524         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8525         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8526         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8527                 (wm->num_heads * cursor_line_pair_return_time);
8528         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8529         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8530         u32 tmp, dmif_size = 12288;
8531         fixed20_12 a, b, c;
8532
8533         if (wm->num_heads == 0)
8534                 return 0;
8535
8536         a.full = dfixed_const(2);
8537         b.full = dfixed_const(1);
8538         if ((wm->vsc.full > a.full) ||
8539             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8540             (wm->vtaps >= 5) ||
8541             ((wm->vsc.full >= a.full) && wm->interlaced))
8542                 max_src_lines_per_dst_line = 4;
8543         else
8544                 max_src_lines_per_dst_line = 2;
8545
8546         a.full = dfixed_const(available_bandwidth);
8547         b.full = dfixed_const(wm->num_heads);
8548         a.full = dfixed_div(a, b);
8549
8550         b.full = dfixed_const(mc_latency + 512);
8551         c.full = dfixed_const(wm->disp_clk);
8552         b.full = dfixed_div(b, c);
8553
8554         c.full = dfixed_const(dmif_size);
8555         b.full = dfixed_div(c, b);
8556
8557         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8558
8559         b.full = dfixed_const(1000);
8560         c.full = dfixed_const(wm->disp_clk);
8561         b.full = dfixed_div(c, b);
8562         c.full = dfixed_const(wm->bytes_per_pixel);
8563         b.full = dfixed_mul(b, c);
8564
8565         lb_fill_bw = min(tmp, dfixed_trunc(b));
8566
8567         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8568         b.full = dfixed_const(1000);
8569         c.full = dfixed_const(lb_fill_bw);
8570         b.full = dfixed_div(c, b);
8571         a.full = dfixed_div(a, b);
8572         line_fill_time = dfixed_trunc(a);
8573
8574         if (line_fill_time < wm->active_time)
8575                 return latency;
8576         else
8577                 return latency + (line_fill_time - wm->active_time);
8578
8579 }
8580
8581 /**
8582  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8583  * average and available dram bandwidth
8584  *
8585  * @wm: watermark calculation data
8586  *
8587  * Check if the display average bandwidth fits in the display
8588  * dram bandwidth (CIK).
8589  * Used for display watermark bandwidth calculations
8590  * Returns true if the display fits, false if not.
8591  */
8592 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8593 {
8594         if (dce8_average_bandwidth(wm) <=
8595             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8596                 return true;
8597         else
8598                 return false;
8599 }
8600
8601 /**
8602  * dce8_average_bandwidth_vs_available_bandwidth - check
8603  * average and available bandwidth
8604  *
8605  * @wm: watermark calculation data
8606  *
8607  * Check if the display average bandwidth fits in the display
8608  * available bandwidth (CIK).
8609  * Used for display watermark bandwidth calculations
8610  * Returns true if the display fits, false if not.
8611  */
8612 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8613 {
8614         if (dce8_average_bandwidth(wm) <=
8615             (dce8_available_bandwidth(wm) / wm->num_heads))
8616                 return true;
8617         else
8618                 return false;
8619 }
8620
8621 /**
8622  * dce8_check_latency_hiding - check latency hiding
8623  *
8624  * @wm: watermark calculation data
8625  *
8626  * Check latency hiding (CIK).
8627  * Used for display watermark bandwidth calculations
8628  * Returns true if the display fits, false if not.
8629  */
8630 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8631 {
8632         u32 lb_partitions = wm->lb_size / wm->src_width;
8633         u32 line_time = wm->active_time + wm->blank_time;
8634         u32 latency_tolerant_lines;
8635         u32 latency_hiding;
8636         fixed20_12 a;
8637
8638         a.full = dfixed_const(1);
8639         if (wm->vsc.full > a.full)
8640                 latency_tolerant_lines = 1;
8641         else {
8642                 if (lb_partitions <= (wm->vtaps + 1))
8643                         latency_tolerant_lines = 1;
8644                 else
8645                         latency_tolerant_lines = 2;
8646         }
8647
8648         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8649
8650         if (dce8_latency_watermark(wm) <= latency_hiding)
8651                 return true;
8652         else
8653                 return false;
8654 }
8655
8656 /**
8657  * dce8_program_watermarks - program display watermarks
8658  *
8659  * @rdev: radeon_device pointer
8660  * @radeon_crtc: the selected display controller
8661  * @lb_size: line buffer size
8662  * @num_heads: number of display controllers in use
8663  *
8664  * Calculate and program the display watermarks for the
8665  * selected display controller (CIK).
8666  */
8667 static void dce8_program_watermarks(struct radeon_device *rdev,
8668                                     struct radeon_crtc *radeon_crtc,
8669                                     u32 lb_size, u32 num_heads)
8670 {
8671         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8672         struct dce8_wm_params wm_low, wm_high;
8673         u32 pixel_period;
8674         u32 line_time = 0;
8675         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8676         u32 tmp, wm_mask;
8677
8678         if (radeon_crtc->base.enabled && num_heads && mode) {
8679                 pixel_period = 1000000 / (u32)mode->clock;
8680                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8681
8682                 /* watermark for high clocks */
8683                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8684                     rdev->pm.dpm_enabled) {
8685                         wm_high.yclk =
8686                                 radeon_dpm_get_mclk(rdev, false) * 10;
8687                         wm_high.sclk =
8688                                 radeon_dpm_get_sclk(rdev, false) * 10;
8689                 } else {
8690                         wm_high.yclk = rdev->pm.current_mclk * 10;
8691                         wm_high.sclk = rdev->pm.current_sclk * 10;
8692                 }
8693
8694                 wm_high.disp_clk = mode->clock;
8695                 wm_high.src_width = mode->crtc_hdisplay;
8696                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8697                 wm_high.blank_time = line_time - wm_high.active_time;
8698                 wm_high.interlaced = false;
8699                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8700                         wm_high.interlaced = true;
8701                 wm_high.vsc = radeon_crtc->vsc;
8702                 wm_high.vtaps = 1;
8703                 if (radeon_crtc->rmx_type != RMX_OFF)
8704                         wm_high.vtaps = 2;
8705                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8706                 wm_high.lb_size = lb_size;
8707                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8708                 wm_high.num_heads = num_heads;
8709
8710                 /* set for high clocks */
8711                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8712
8713                 /* possibly force display priority to high */
8714                 /* should really do this at mode validation time... */
8715                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8716                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8717                     !dce8_check_latency_hiding(&wm_high) ||
8718                     (rdev->disp_priority == 2)) {
8719                         DRM_DEBUG_KMS("force priority to high\n");
8720                 }
8721
8722                 /* watermark for low clocks */
8723                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8724                     rdev->pm.dpm_enabled) {
8725                         wm_low.yclk =
8726                                 radeon_dpm_get_mclk(rdev, true) * 10;
8727                         wm_low.sclk =
8728                                 radeon_dpm_get_sclk(rdev, true) * 10;
8729                 } else {
8730                         wm_low.yclk = rdev->pm.current_mclk * 10;
8731                         wm_low.sclk = rdev->pm.current_sclk * 10;
8732                 }
8733
8734                 wm_low.disp_clk = mode->clock;
8735                 wm_low.src_width = mode->crtc_hdisplay;
8736                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8737                 wm_low.blank_time = line_time - wm_low.active_time;
8738                 wm_low.interlaced = false;
8739                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8740                         wm_low.interlaced = true;
8741                 wm_low.vsc = radeon_crtc->vsc;
8742                 wm_low.vtaps = 1;
8743                 if (radeon_crtc->rmx_type != RMX_OFF)
8744                         wm_low.vtaps = 2;
8745                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8746                 wm_low.lb_size = lb_size;
8747                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8748                 wm_low.num_heads = num_heads;
8749
8750                 /* set for low clocks */
8751                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8752
8753                 /* possibly force display priority to high */
8754                 /* should really do this at mode validation time... */
8755                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8756                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8757                     !dce8_check_latency_hiding(&wm_low) ||
8758                     (rdev->disp_priority == 2)) {
8759                         DRM_DEBUG_KMS("force priority to high\n");
8760                 }
8761         }
8762
8763         /* select wm A */
8764         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8765         tmp = wm_mask;
8766         tmp &= ~LATENCY_WATERMARK_MASK(3);
8767         tmp |= LATENCY_WATERMARK_MASK(1);
8768         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8769         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8770                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8771                 LATENCY_HIGH_WATERMARK(line_time)));
8772         /* select wm B */
8773         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8774         tmp &= ~LATENCY_WATERMARK_MASK(3);
8775         tmp |= LATENCY_WATERMARK_MASK(2);
8776         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8777         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8778                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8779                 LATENCY_HIGH_WATERMARK(line_time)));
8780         /* restore original selection */
8781         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8782
8783         /* save values for DPM */
8784         radeon_crtc->line_time = line_time;
8785         radeon_crtc->wm_high = latency_watermark_a;
8786         radeon_crtc->wm_low = latency_watermark_b;
8787 }
8788
8789 /**
8790  * dce8_bandwidth_update - program display watermarks
8791  *
8792  * @rdev: radeon_device pointer
8793  *
8794  * Calculate and program the display watermarks and line
8795  * buffer allocation (CIK).
8796  */
8797 void dce8_bandwidth_update(struct radeon_device *rdev)
8798 {
8799         struct drm_display_mode *mode = NULL;
8800         u32 num_heads = 0, lb_size;
8801         int i;
8802
8803         radeon_update_display_priority(rdev);
8804
8805         for (i = 0; i < rdev->num_crtc; i++) {
8806                 if (rdev->mode_info.crtcs[i]->base.enabled)
8807                         num_heads++;
8808         }
8809         for (i = 0; i < rdev->num_crtc; i++) {
8810                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8811                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8812                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8813         }
8814 }
8815
8816 /**
8817  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8818  *
8819  * @rdev: radeon_device pointer
8820  *
8821  * Fetches a GPU clock counter snapshot (SI).
8822  * Returns the 64 bit clock counter snapshot.
8823  */
8824 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8825 {
8826         uint64_t clock;
8827
8828         mutex_lock(&rdev->gpu_clock_mutex);
8829         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8830         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8831                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8832         mutex_unlock(&rdev->gpu_clock_mutex);
8833         return clock;
8834 }
8835
8836 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8837                               u32 cntl_reg, u32 status_reg)
8838 {
8839         int r, i;
8840         struct atom_clock_dividers dividers;
8841         uint32_t tmp;
8842
8843         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8844                                            clock, false, &dividers);
8845         if (r)
8846                 return r;
8847
8848         tmp = RREG32_SMC(cntl_reg);
8849         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8850         tmp |= dividers.post_divider;
8851         WREG32_SMC(cntl_reg, tmp);
8852
8853         for (i = 0; i < 100; i++) {
8854                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8855                         break;
8856                 mdelay(10);
8857         }
8858         if (i == 100)
8859                 return -ETIMEDOUT;
8860
8861         return 0;
8862 }
8863
8864 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8865 {
8866         int r = 0;
8867
8868         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8869         if (r)
8870                 return r;
8871
8872         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8873         return r;
8874 }
8875
8876 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8877 {
8878         struct pci_dev *root = rdev->pdev->bus->self;
8879         int bridge_pos, gpu_pos;
8880         u32 speed_cntl, mask, current_data_rate;
8881         int ret, i;
8882         u16 tmp16;
8883
8884         if (radeon_pcie_gen2 == 0)
8885                 return;
8886
8887         if (rdev->flags & RADEON_IS_IGP)
8888                 return;
8889
8890         if (!(rdev->flags & RADEON_IS_PCIE))
8891                 return;
8892
8893         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8894         if (ret != 0)
8895                 return;
8896
8897         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8898                 return;
8899
8900         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8901         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8902                 LC_CURRENT_DATA_RATE_SHIFT;
8903         if (mask & DRM_PCIE_SPEED_80) {
8904                 if (current_data_rate == 2) {
8905                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8906                         return;
8907                 }
8908                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8909         } else if (mask & DRM_PCIE_SPEED_50) {
8910                 if (current_data_rate == 1) {
8911                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8912                         return;
8913                 }
8914                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8915         }
8916
8917         bridge_pos = pci_pcie_cap(root);
8918         if (!bridge_pos)
8919                 return;
8920
8921         gpu_pos = pci_pcie_cap(rdev->pdev);
8922         if (!gpu_pos)
8923                 return;
8924
8925         if (mask & DRM_PCIE_SPEED_80) {
8926                 /* re-try equalization if gen3 is not already enabled */
8927                 if (current_data_rate != 2) {
8928                         u16 bridge_cfg, gpu_cfg;
8929                         u16 bridge_cfg2, gpu_cfg2;
8930                         u32 max_lw, current_lw, tmp;
8931
8932                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8933                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8934
8935                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8936                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8937
8938                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8939                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8940
8941                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8942                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8943                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8944
8945                         if (current_lw < max_lw) {
8946                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8947                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8948                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8949                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8950                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8951                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8952                                 }
8953                         }
8954
8955                         for (i = 0; i < 10; i++) {
8956                                 /* check status */
8957                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8958                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8959                                         break;
8960
8961                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8962                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8963
8964                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8965                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8966
8967                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8968                                 tmp |= LC_SET_QUIESCE;
8969                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8970
8971                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8972                                 tmp |= LC_REDO_EQ;
8973                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8974
8975                                 mdelay(100);
8976
8977                                 /* linkctl */
8978                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8979                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8980                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8981                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8982
8983                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8984                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8985                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8986                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8987
8988                                 /* linkctl2 */
8989                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8990                                 tmp16 &= ~((1 << 4) | (7 << 9));
8991                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8992                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8993
8994                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8995                                 tmp16 &= ~((1 << 4) | (7 << 9));
8996                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8997                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8998
8999                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9000                                 tmp &= ~LC_SET_QUIESCE;
9001                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9002                         }
9003                 }
9004         }
9005
9006         /* set the link speed */
9007         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9008         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9009         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9010
9011         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9012         tmp16 &= ~0xf;
9013         if (mask & DRM_PCIE_SPEED_80)
9014                 tmp16 |= 3; /* gen3 */
9015         else if (mask & DRM_PCIE_SPEED_50)
9016                 tmp16 |= 2; /* gen2 */
9017         else
9018                 tmp16 |= 1; /* gen1 */
9019         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9020
9021         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9022         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9023         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9024
9025         for (i = 0; i < rdev->usec_timeout; i++) {
9026                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9027                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9028                         break;
9029                 udelay(1);
9030         }
9031 }
9032
9033 static void cik_program_aspm(struct radeon_device *rdev)
9034 {
9035         u32 data, orig;
9036         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9037         bool disable_clkreq = false;
9038
9039         if (radeon_aspm == 0)
9040                 return;
9041
9042         /* XXX double check IGPs */
9043         if (rdev->flags & RADEON_IS_IGP)
9044                 return;
9045
9046         if (!(rdev->flags & RADEON_IS_PCIE))
9047                 return;
9048
9049         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9050         data &= ~LC_XMIT_N_FTS_MASK;
9051         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9052         if (orig != data)
9053                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9054
9055         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9056         data |= LC_GO_TO_RECOVERY;
9057         if (orig != data)
9058                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9059
9060         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9061         data |= P_IGNORE_EDB_ERR;
9062         if (orig != data)
9063                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9064
9065         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9066         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9067         data |= LC_PMI_TO_L1_DIS;
9068         if (!disable_l0s)
9069                 data |= LC_L0S_INACTIVITY(7);
9070
9071         if (!disable_l1) {
9072                 data |= LC_L1_INACTIVITY(7);
9073                 data &= ~LC_PMI_TO_L1_DIS;
9074                 if (orig != data)
9075                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9076
9077                 if (!disable_plloff_in_l1) {
9078                         bool clk_req_support;
9079
9080                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9081                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9082                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9083                         if (orig != data)
9084                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9085
9086                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9087                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9088                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9089                         if (orig != data)
9090                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9091
9092                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9093                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9094                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9095                         if (orig != data)
9096                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9097
9098                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9099                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9100                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9101                         if (orig != data)
9102                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9103
9104                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9105                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9106                         data |= LC_DYN_LANES_PWR_STATE(3);
9107                         if (orig != data)
9108                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9109
9110                         if (!disable_clkreq) {
9111                                 struct pci_dev *root = rdev->pdev->bus->self;
9112                                 u32 lnkcap;
9113
9114                                 clk_req_support = false;
9115                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9116                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9117                                         clk_req_support = true;
9118                         } else {
9119                                 clk_req_support = false;
9120                         }
9121
9122                         if (clk_req_support) {
9123                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9124                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9125                                 if (orig != data)
9126                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9127
9128                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9129                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9130                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9131                                 if (orig != data)
9132                                         WREG32_SMC(THM_CLK_CNTL, data);
9133
9134                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9135                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9136                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9137                                 if (orig != data)
9138                                         WREG32_SMC(MISC_CLK_CTRL, data);
9139
9140                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9141                                 data &= ~BCLK_AS_XCLK;
9142                                 if (orig != data)
9143                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9144
9145                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9146                                 data &= ~FORCE_BIF_REFCLK_EN;
9147                                 if (orig != data)
9148                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9149
9150                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9151                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9152                                 data |= MPLL_CLKOUT_SEL(4);
9153                                 if (orig != data)
9154                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9155                         }
9156                 }
9157         } else {
9158                 if (orig != data)
9159                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9160         }
9161
9162         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9163         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9164         if (orig != data)
9165                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9166
9167         if (!disable_l0s) {
9168                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9169                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9170                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9171                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9172                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9173                                 data &= ~LC_L0S_INACTIVITY_MASK;
9174                                 if (orig != data)
9175                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9176                         }
9177                 }
9178         }
9179 }